gcc:
[official-gcc.git] / gcc / cpplex.c
blob025d72986fb1f34d40c403c4fd7a9aa40ea24fa2
1 /* CPP Library - lexical analysis.
2 Copyright (C) 2000, 2001, 2002, 2003 Free Software Foundation, Inc.
3 Contributed by Per Bothner, 1994-95.
4 Based on CCCP program by Paul Rubin, June 1986
5 Adapted to ANSI C, Richard Stallman, Jan 1987
6 Broken out to separate file, Zack Weinberg, Mar 2000
8 This program is free software; you can redistribute it and/or modify it
9 under the terms of the GNU General Public License as published by the
10 Free Software Foundation; either version 2, or (at your option) any
11 later version.
13 This program is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with this program; if not, write to the Free Software
20 Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
22 #include "config.h"
23 #include "system.h"
24 #include "cpplib.h"
25 #include "cpphash.h"
27 enum spell_type
29 SPELL_OPERATOR = 0,
30 SPELL_IDENT,
31 SPELL_LITERAL,
32 SPELL_NONE
35 struct token_spelling
37 enum spell_type category;
38 const unsigned char *name;
41 static const unsigned char *const digraph_spellings[] =
42 { U"%:", U"%:%:", U"<:", U":>", U"<%", U"%>" };
44 #define OP(e, s) { SPELL_OPERATOR, U s },
45 #define TK(e, s) { s, U #e },
46 static const struct token_spelling token_spellings[N_TTYPES] = { TTYPE_TABLE };
47 #undef OP
48 #undef TK
50 #define TOKEN_SPELL(token) (token_spellings[(token)->type].category)
51 #define TOKEN_NAME(token) (token_spellings[(token)->type].name)
53 static void add_line_note (cpp_buffer *, const uchar *, unsigned int);
54 static int skip_line_comment (cpp_reader *);
55 static void skip_whitespace (cpp_reader *, cppchar_t);
56 static cpp_hashnode *lex_identifier (cpp_reader *, const uchar *);
57 static void lex_number (cpp_reader *, cpp_string *);
58 static bool forms_identifier_p (cpp_reader *, int);
59 static void lex_string (cpp_reader *, cpp_token *, const uchar *);
60 static void save_comment (cpp_reader *, cpp_token *, const uchar *, cppchar_t);
61 static void create_literal (cpp_reader *, cpp_token *, const uchar *,
62 unsigned int, enum cpp_ttype);
63 static bool warn_in_comment (cpp_reader *, _cpp_line_note *);
64 static int name_p (cpp_reader *, const cpp_string *);
65 static tokenrun *next_tokenrun (tokenrun *);
67 static _cpp_buff *new_buff (size_t);
70 /* Utility routine:
72 Compares, the token TOKEN to the NUL-terminated string STRING.
73 TOKEN must be a CPP_NAME. Returns 1 for equal, 0 for unequal. */
74 int
75 cpp_ideq (const cpp_token *token, const char *string)
77 if (token->type != CPP_NAME)
78 return 0;
80 return !ustrcmp (NODE_NAME (token->val.node), (const uchar *) string);
83 /* Record a note TYPE at byte POS into the current cleaned logical
84 line. */
85 static void
86 add_line_note (cpp_buffer *buffer, const uchar *pos, unsigned int type)
88 if (buffer->notes_used == buffer->notes_cap)
90 buffer->notes_cap = buffer->notes_cap * 2 + 200;
91 buffer->notes = xrealloc (buffer->notes,
92 buffer->notes_cap * sizeof (_cpp_line_note));
95 buffer->notes[buffer->notes_used].pos = pos;
96 buffer->notes[buffer->notes_used].type = type;
97 buffer->notes_used++;
100 /* Returns with a logical line that contains no escaped newlines or
101 trigraphs. This is a time-critical inner loop. */
102 void
103 _cpp_clean_line (cpp_reader *pfile)
105 cpp_buffer *buffer;
106 const uchar *s;
107 uchar c, *d, *p;
109 buffer = pfile->buffer;
110 buffer->cur_note = buffer->notes_used = 0;
111 buffer->cur = buffer->line_base = buffer->next_line;
112 buffer->need_line = false;
113 s = buffer->next_line - 1;
115 if (!buffer->from_stage3)
117 d = (uchar *) s;
119 for (;;)
121 c = *++s;
122 *++d = c;
124 if (c == '\n' || c == '\r')
126 /* Handle DOS line endings. */
127 if (c == '\r' && s != buffer->rlimit && s[1] == '\n')
128 s++;
129 if (s == buffer->rlimit)
130 break;
132 /* Escaped? */
133 p = d;
134 while (p != buffer->next_line && is_nvspace (p[-1]))
135 p--;
136 if (p == buffer->next_line || p[-1] != '\\')
137 break;
139 add_line_note (buffer, p - 1, p != d ? ' ': '\\');
140 d = p - 2;
141 buffer->next_line = p - 1;
143 else if (c == '?' && s[1] == '?' && _cpp_trigraph_map[s[2]])
145 /* Add a note regardless, for the benefit of -Wtrigraphs. */
146 add_line_note (buffer, d, s[2]);
147 if (CPP_OPTION (pfile, trigraphs))
149 *d = _cpp_trigraph_map[s[2]];
150 s += 2;
155 else
158 s++;
159 while (*s != '\n' && *s != '\r');
160 d = (uchar *) s;
162 /* Handle DOS line endings. */
163 if (*s == '\r' && s != buffer->rlimit && s[1] == '\n')
164 s++;
167 *d = '\n';
168 /* A sentinel note that should never be processed. */
169 add_line_note (buffer, d + 1, '\n');
170 buffer->next_line = s + 1;
173 /* Return true if the trigraph indicated by NOTE should be warned
174 about in a comment. */
175 static bool
176 warn_in_comment (cpp_reader *pfile, _cpp_line_note *note)
178 const uchar *p;
180 /* Within comments we don't warn about trigraphs, unless the
181 trigraph forms an escaped newline, as that may change
182 behavior. */
183 if (note->type != '/')
184 return false;
186 /* If -trigraphs, then this was an escaped newline iff the next note
187 is coincident. */
188 if (CPP_OPTION (pfile, trigraphs))
189 return note[1].pos == note->pos;
191 /* Otherwise, see if this forms an escaped newline. */
192 p = note->pos + 3;
193 while (is_nvspace (*p))
194 p++;
196 /* There might have been escaped newlines between the trigraph and the
197 newline we found. Hence the position test. */
198 return (*p == '\n' && p < note[1].pos);
201 /* Process the notes created by add_line_note as far as the current
202 location. */
203 void
204 _cpp_process_line_notes (cpp_reader *pfile, int in_comment)
206 cpp_buffer *buffer = pfile->buffer;
208 for (;;)
210 _cpp_line_note *note = &buffer->notes[buffer->cur_note];
211 unsigned int col;
213 if (note->pos > buffer->cur)
214 break;
216 buffer->cur_note++;
217 col = CPP_BUF_COLUMN (buffer, note->pos + 1);
219 if (note->type == '\\' || note->type == ' ')
221 if (note->type == ' ' && !in_comment)
222 cpp_error_with_line (pfile, DL_WARNING, pfile->line, col,
223 "backslash and newline separated by space");
225 if (buffer->next_line > buffer->rlimit)
227 cpp_error_with_line (pfile, DL_PEDWARN, pfile->line, col,
228 "backslash-newline at end of file");
229 /* Prevent "no newline at end of file" warning. */
230 buffer->next_line = buffer->rlimit;
233 buffer->line_base = note->pos;
234 pfile->line++;
236 else if (_cpp_trigraph_map[note->type])
238 if (CPP_OPTION (pfile, warn_trigraphs)
239 && (!in_comment || warn_in_comment (pfile, note)))
241 if (CPP_OPTION (pfile, trigraphs))
242 cpp_error_with_line (pfile, DL_WARNING, pfile->line, col,
243 "trigraph ??%c converted to %c",
244 note->type,
245 (int) _cpp_trigraph_map[note->type]);
246 else
247 cpp_error_with_line (pfile, DL_WARNING, pfile->line, col,
248 "trigraph ??%c ignored",
249 note->type);
252 else
253 abort ();
257 /* Skip a C-style block comment. We find the end of the comment by
258 seeing if an asterisk is before every '/' we encounter. Returns
259 nonzero if comment terminated by EOF, zero otherwise.
261 Buffer->cur points to the initial asterisk of the comment. */
262 bool
263 _cpp_skip_block_comment (cpp_reader *pfile)
265 cpp_buffer *buffer = pfile->buffer;
266 cppchar_t c;
268 buffer->cur++;
269 if (*buffer->cur == '/')
270 buffer->cur++;
272 for (;;)
274 c = *buffer->cur++;
276 /* People like decorating comments with '*', so check for '/'
277 instead for efficiency. */
278 if (c == '/')
280 if (buffer->cur[-2] == '*')
281 break;
283 /* Warn about potential nested comments, but not if the '/'
284 comes immediately before the true comment delimiter.
285 Don't bother to get it right across escaped newlines. */
286 if (CPP_OPTION (pfile, warn_comments)
287 && buffer->cur[0] == '*' && buffer->cur[1] != '/')
288 cpp_error_with_line (pfile, DL_WARNING,
289 pfile->line, CPP_BUF_COL (buffer),
290 "\"/*\" within comment");
292 else if (c == '\n')
294 buffer->cur--;
295 _cpp_process_line_notes (pfile, true);
296 if (buffer->next_line >= buffer->rlimit)
297 return true;
298 _cpp_clean_line (pfile);
299 pfile->line++;
303 _cpp_process_line_notes (pfile, true);
304 return false;
307 /* Skip a C++ line comment, leaving buffer->cur pointing to the
308 terminating newline. Handles escaped newlines. Returns nonzero
309 if a multiline comment. */
310 static int
311 skip_line_comment (cpp_reader *pfile)
313 cpp_buffer *buffer = pfile->buffer;
314 unsigned int orig_line = pfile->line;
316 while (*buffer->cur != '\n')
317 buffer->cur++;
319 _cpp_process_line_notes (pfile, true);
320 return orig_line != pfile->line;
323 /* Skips whitespace, saving the next non-whitespace character. */
324 static void
325 skip_whitespace (cpp_reader *pfile, cppchar_t c)
327 cpp_buffer *buffer = pfile->buffer;
328 bool saw_NUL = false;
332 /* Horizontal space always OK. */
333 if (c == ' ' || c == '\t')
335 /* Just \f \v or \0 left. */
336 else if (c == '\0')
337 saw_NUL = true;
338 else if (pfile->state.in_directive && CPP_PEDANTIC (pfile))
339 cpp_error_with_line (pfile, DL_PEDWARN, pfile->line,
340 CPP_BUF_COL (buffer),
341 "%s in preprocessing directive",
342 c == '\f' ? "form feed" : "vertical tab");
344 c = *buffer->cur++;
346 /* We only want non-vertical space, i.e. ' ' \t \f \v \0. */
347 while (is_nvspace (c));
349 if (saw_NUL)
350 cpp_error (pfile, DL_WARNING, "null character(s) ignored");
352 buffer->cur--;
355 /* See if the characters of a number token are valid in a name (no
356 '.', '+' or '-'). */
357 static int
358 name_p (cpp_reader *pfile, const cpp_string *string)
360 unsigned int i;
362 for (i = 0; i < string->len; i++)
363 if (!is_idchar (string->text[i]))
364 return 0;
366 return 1;
369 /* Returns TRUE if the sequence starting at buffer->cur is invalid in
370 an identifier. FIRST is TRUE if this starts an identifier. */
371 static bool
372 forms_identifier_p (cpp_reader *pfile, int first)
374 cpp_buffer *buffer = pfile->buffer;
376 if (*buffer->cur == '$')
378 if (!CPP_OPTION (pfile, dollars_in_ident))
379 return false;
381 buffer->cur++;
382 if (CPP_OPTION (pfile, warn_dollars) && !pfile->state.skipping)
384 CPP_OPTION (pfile, warn_dollars) = 0;
385 cpp_error (pfile, DL_PEDWARN, "'$' in identifier or number");
388 return true;
391 /* Is this a syntactically valid UCN? */
392 if (0 && *buffer->cur == '\\'
393 && (buffer->cur[1] == 'u' || buffer->cur[1] == 'U'))
395 buffer->cur += 2;
396 if (_cpp_valid_ucn (pfile, &buffer->cur, buffer->rlimit, 1 + !first))
397 return true;
398 buffer->cur -= 2;
401 return false;
404 /* Lex an identifier starting at BUFFER->CUR - 1. */
405 static cpp_hashnode *
406 lex_identifier (cpp_reader *pfile, const uchar *base)
408 cpp_hashnode *result;
409 const uchar *cur;
413 cur = pfile->buffer->cur;
415 /* N.B. ISIDNUM does not include $. */
416 while (ISIDNUM (*cur))
417 cur++;
419 pfile->buffer->cur = cur;
421 while (forms_identifier_p (pfile, false));
423 result = (cpp_hashnode *)
424 ht_lookup (pfile->hash_table, base, cur - base, HT_ALLOC);
426 /* Rarely, identifiers require diagnostics when lexed. */
427 if (__builtin_expect ((result->flags & NODE_DIAGNOSTIC)
428 && !pfile->state.skipping, 0))
430 /* It is allowed to poison the same identifier twice. */
431 if ((result->flags & NODE_POISONED) && !pfile->state.poisoned_ok)
432 cpp_error (pfile, DL_ERROR, "attempt to use poisoned \"%s\"",
433 NODE_NAME (result));
435 /* Constraint 6.10.3.5: __VA_ARGS__ should only appear in the
436 replacement list of a variadic macro. */
437 if (result == pfile->spec_nodes.n__VA_ARGS__
438 && !pfile->state.va_args_ok)
439 cpp_error (pfile, DL_PEDWARN,
440 "__VA_ARGS__ can only appear in the expansion"
441 " of a C99 variadic macro");
444 return result;
447 /* Lex a number to NUMBER starting at BUFFER->CUR - 1. */
448 static void
449 lex_number (cpp_reader *pfile, cpp_string *number)
451 const uchar *cur;
452 const uchar *base;
453 uchar *dest;
455 base = pfile->buffer->cur - 1;
458 cur = pfile->buffer->cur;
460 /* N.B. ISIDNUM does not include $. */
461 while (ISIDNUM (*cur) || *cur == '.' || VALID_SIGN (*cur, cur[-1]))
462 cur++;
464 pfile->buffer->cur = cur;
466 while (forms_identifier_p (pfile, false));
468 number->len = cur - base;
469 dest = _cpp_unaligned_alloc (pfile, number->len + 1);
470 memcpy (dest, base, number->len);
471 dest[number->len] = '\0';
472 number->text = dest;
475 /* Create a token of type TYPE with a literal spelling. */
476 static void
477 create_literal (cpp_reader *pfile, cpp_token *token, const uchar *base,
478 unsigned int len, enum cpp_ttype type)
480 uchar *dest = _cpp_unaligned_alloc (pfile, len + 1);
482 memcpy (dest, base, len);
483 dest[len] = '\0';
484 token->type = type;
485 token->val.str.len = len;
486 token->val.str.text = dest;
489 /* Lexes a string, character constant, or angle-bracketed header file
490 name. The stored string contains the spelling, including opening
491 quote and leading any leading 'L'. It returns the type of the
492 literal, or CPP_OTHER if it was not properly terminated.
494 The spelling is NUL-terminated, but it is not guaranteed that this
495 is the first NUL since embedded NULs are preserved. */
496 static void
497 lex_string (cpp_reader *pfile, cpp_token *token, const uchar *base)
499 bool saw_NUL = false;
500 const uchar *cur;
501 cppchar_t terminator;
502 enum cpp_ttype type;
504 cur = base;
505 terminator = *cur++;
506 if (terminator == 'L')
507 terminator = *cur++;
508 if (terminator == '\"')
509 type = *base == 'L' ? CPP_WSTRING: CPP_STRING;
510 else if (terminator == '\'')
511 type = *base == 'L' ? CPP_WCHAR: CPP_CHAR;
512 else
513 terminator = '>', type = CPP_HEADER_NAME;
515 for (;;)
517 cppchar_t c = *cur++;
519 /* In #include-style directives, terminators are not escapable. */
520 if (c == '\\' && !pfile->state.angled_headers && *cur != '\n')
521 cur++;
522 else if (c == terminator)
523 break;
524 else if (c == '\n')
526 cur--;
527 type = CPP_OTHER;
528 break;
530 else if (c == '\0')
531 saw_NUL = true;
534 if (saw_NUL && !pfile->state.skipping)
535 cpp_error (pfile, DL_WARNING, "null character(s) preserved in literal");
537 pfile->buffer->cur = cur;
538 create_literal (pfile, token, base, cur - base, type);
541 /* The stored comment includes the comment start and any terminator. */
542 static void
543 save_comment (cpp_reader *pfile, cpp_token *token, const unsigned char *from,
544 cppchar_t type)
546 unsigned char *buffer;
547 unsigned int len, clen;
549 len = pfile->buffer->cur - from + 1; /* + 1 for the initial '/'. */
551 /* C++ comments probably (not definitely) have moved past a new
552 line, which we don't want to save in the comment. */
553 if (is_vspace (pfile->buffer->cur[-1]))
554 len--;
556 /* If we are currently in a directive, then we need to store all
557 C++ comments as C comments internally, and so we need to
558 allocate a little extra space in that case.
560 Note that the only time we encounter a directive here is
561 when we are saving comments in a "#define". */
562 clen = (pfile->state.in_directive && type == '/') ? len + 2 : len;
564 buffer = _cpp_unaligned_alloc (pfile, clen);
566 token->type = CPP_COMMENT;
567 token->val.str.len = clen;
568 token->val.str.text = buffer;
570 buffer[0] = '/';
571 memcpy (buffer + 1, from, len - 1);
573 /* Finish conversion to a C comment, if necessary. */
574 if (pfile->state.in_directive && type == '/')
576 buffer[1] = '*';
577 buffer[clen - 2] = '*';
578 buffer[clen - 1] = '/';
582 /* Allocate COUNT tokens for RUN. */
583 void
584 _cpp_init_tokenrun (tokenrun *run, unsigned int count)
586 run->base = xnewvec (cpp_token, count);
587 run->limit = run->base + count;
588 run->next = NULL;
591 /* Returns the next tokenrun, or creates one if there is none. */
592 static tokenrun *
593 next_tokenrun (tokenrun *run)
595 if (run->next == NULL)
597 run->next = xnew (tokenrun);
598 run->next->prev = run;
599 _cpp_init_tokenrun (run->next, 250);
602 return run->next;
605 /* Allocate a single token that is invalidated at the same time as the
606 rest of the tokens on the line. Has its line and col set to the
607 same as the last lexed token, so that diagnostics appear in the
608 right place. */
609 cpp_token *
610 _cpp_temp_token (cpp_reader *pfile)
612 cpp_token *old, *result;
614 old = pfile->cur_token - 1;
615 if (pfile->cur_token == pfile->cur_run->limit)
617 pfile->cur_run = next_tokenrun (pfile->cur_run);
618 pfile->cur_token = pfile->cur_run->base;
621 result = pfile->cur_token++;
622 result->line = old->line;
623 result->col = old->col;
624 return result;
627 /* Lex a token into RESULT (external interface). Takes care of issues
628 like directive handling, token lookahead, multiple include
629 optimization and skipping. */
630 const cpp_token *
631 _cpp_lex_token (cpp_reader *pfile)
633 cpp_token *result;
635 for (;;)
637 if (pfile->cur_token == pfile->cur_run->limit)
639 pfile->cur_run = next_tokenrun (pfile->cur_run);
640 pfile->cur_token = pfile->cur_run->base;
643 if (pfile->lookaheads)
645 pfile->lookaheads--;
646 result = pfile->cur_token++;
648 else
649 result = _cpp_lex_direct (pfile);
651 if (result->flags & BOL)
653 /* Is this a directive. If _cpp_handle_directive returns
654 false, it is an assembler #. */
655 if (result->type == CPP_HASH
656 /* 6.10.3 p 11: Directives in a list of macro arguments
657 gives undefined behavior. This implementation
658 handles the directive as normal. */
659 && pfile->state.parsing_args != 1
660 && _cpp_handle_directive (pfile, result->flags & PREV_WHITE))
661 continue;
662 if (pfile->cb.line_change && !pfile->state.skipping)
663 pfile->cb.line_change (pfile, result, pfile->state.parsing_args);
666 /* We don't skip tokens in directives. */
667 if (pfile->state.in_directive)
668 break;
670 /* Outside a directive, invalidate controlling macros. At file
671 EOF, _cpp_lex_direct takes care of popping the buffer, so we never
672 get here and MI optimization works. */
673 pfile->mi_valid = false;
675 if (!pfile->state.skipping || result->type == CPP_EOF)
676 break;
679 return result;
682 /* Returns true if a fresh line has been loaded. */
683 bool
684 _cpp_get_fresh_line (cpp_reader *pfile)
686 /* We can't get a new line until we leave the current directive. */
687 if (pfile->state.in_directive)
688 return false;
690 for (;;)
692 cpp_buffer *buffer = pfile->buffer;
694 if (!buffer->need_line)
695 return true;
697 if (buffer->next_line < buffer->rlimit)
699 _cpp_clean_line (pfile);
700 return true;
703 /* First, get out of parsing arguments state. */
704 if (pfile->state.parsing_args)
705 return false;
707 /* End of buffer. Non-empty files should end in a newline. */
708 if (buffer->buf != buffer->rlimit
709 && buffer->next_line > buffer->rlimit
710 && !buffer->from_stage3)
712 /* Only warn once. */
713 buffer->next_line = buffer->rlimit;
714 cpp_error_with_line (pfile, DL_PEDWARN, pfile->line - 1,
715 CPP_BUF_COLUMN (buffer, buffer->cur),
716 "no newline at end of file");
719 if (!buffer->prev)
720 return false;
722 if (buffer->return_at_eof)
724 _cpp_pop_buffer (pfile);
725 return false;
728 _cpp_pop_buffer (pfile);
732 #define IF_NEXT_IS(CHAR, THEN_TYPE, ELSE_TYPE) \
733 do \
735 result->type = ELSE_TYPE; \
736 if (*buffer->cur == CHAR) \
737 buffer->cur++, result->type = THEN_TYPE; \
739 while (0)
741 /* Lex a token into pfile->cur_token, which is also incremented, to
742 get diagnostics pointing to the correct location.
744 Does not handle issues such as token lookahead, multiple-include
745 optimization, directives, skipping etc. This function is only
746 suitable for use by _cpp_lex_token, and in special cases like
747 lex_expansion_token which doesn't care for any of these issues.
749 When meeting a newline, returns CPP_EOF if parsing a directive,
750 otherwise returns to the start of the token buffer if permissible.
751 Returns the location of the lexed token. */
752 cpp_token *
753 _cpp_lex_direct (cpp_reader *pfile)
755 cppchar_t c;
756 cpp_buffer *buffer;
757 const unsigned char *comment_start;
758 cpp_token *result = pfile->cur_token++;
760 fresh_line:
761 result->flags = 0;
762 if (pfile->buffer->need_line)
764 if (!_cpp_get_fresh_line (pfile))
766 result->type = CPP_EOF;
767 if (!pfile->state.in_directive)
769 /* Tell the compiler the line number of the EOF token. */
770 result->line = pfile->line;
771 result->flags = BOL;
773 return result;
775 if (!pfile->keep_tokens)
777 pfile->cur_run = &pfile->base_run;
778 result = pfile->base_run.base;
779 pfile->cur_token = result + 1;
781 result->flags = BOL;
782 if (pfile->state.parsing_args == 2)
783 result->flags |= PREV_WHITE;
785 buffer = pfile->buffer;
786 update_tokens_line:
787 result->line = pfile->line;
789 skipped_white:
790 if (buffer->cur >= buffer->notes[buffer->cur_note].pos
791 && !pfile->overlaid_buffer)
793 _cpp_process_line_notes (pfile, false);
794 result->line = pfile->line;
796 c = *buffer->cur++;
797 result->col = CPP_BUF_COLUMN (buffer, buffer->cur);
799 switch (c)
801 case ' ': case '\t': case '\f': case '\v': case '\0':
802 result->flags |= PREV_WHITE;
803 skip_whitespace (pfile, c);
804 goto skipped_white;
806 case '\n':
807 pfile->line++;
808 buffer->need_line = true;
809 goto fresh_line;
811 case '0': case '1': case '2': case '3': case '4':
812 case '5': case '6': case '7': case '8': case '9':
813 result->type = CPP_NUMBER;
814 lex_number (pfile, &result->val.str);
815 break;
817 case 'L':
818 /* 'L' may introduce wide characters or strings. */
819 if (*buffer->cur == '\'' || *buffer->cur == '"')
821 lex_string (pfile, result, buffer->cur - 1);
822 break;
824 /* Fall through. */
826 case '_':
827 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
828 case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
829 case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
830 case 's': case 't': case 'u': case 'v': case 'w': case 'x':
831 case 'y': case 'z':
832 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
833 case 'G': case 'H': case 'I': case 'J': case 'K':
834 case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
835 case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
836 case 'Y': case 'Z':
837 result->type = CPP_NAME;
838 result->val.node = lex_identifier (pfile, buffer->cur - 1);
840 /* Convert named operators to their proper types. */
841 if (result->val.node->flags & NODE_OPERATOR)
843 result->flags |= NAMED_OP;
844 result->type = result->val.node->directive_index;
846 break;
848 case '\'':
849 case '"':
850 lex_string (pfile, result, buffer->cur - 1);
851 break;
853 case '/':
854 /* A potential block or line comment. */
855 comment_start = buffer->cur;
856 c = *buffer->cur;
858 if (c == '*')
860 if (_cpp_skip_block_comment (pfile))
861 cpp_error (pfile, DL_ERROR, "unterminated comment");
863 else if (c == '/' && (CPP_OPTION (pfile, cplusplus_comments)
864 || CPP_IN_SYSTEM_HEADER (pfile)))
866 /* Warn about comments only if pedantically GNUC89, and not
867 in system headers. */
868 if (CPP_OPTION (pfile, lang) == CLK_GNUC89 && CPP_PEDANTIC (pfile)
869 && ! buffer->warned_cplusplus_comments)
871 cpp_error (pfile, DL_PEDWARN,
872 "C++ style comments are not allowed in ISO C90");
873 cpp_error (pfile, DL_PEDWARN,
874 "(this will be reported only once per input file)");
875 buffer->warned_cplusplus_comments = 1;
878 if (skip_line_comment (pfile) && CPP_OPTION (pfile, warn_comments))
879 cpp_error (pfile, DL_WARNING, "multi-line comment");
881 else if (c == '=')
883 buffer->cur++;
884 result->type = CPP_DIV_EQ;
885 break;
887 else
889 result->type = CPP_DIV;
890 break;
893 if (!pfile->state.save_comments)
895 result->flags |= PREV_WHITE;
896 goto update_tokens_line;
899 /* Save the comment as a token in its own right. */
900 save_comment (pfile, result, comment_start, c);
901 break;
903 case '<':
904 if (pfile->state.angled_headers)
906 lex_string (pfile, result, buffer->cur - 1);
907 break;
910 result->type = CPP_LESS;
911 if (*buffer->cur == '=')
912 buffer->cur++, result->type = CPP_LESS_EQ;
913 else if (*buffer->cur == '<')
915 buffer->cur++;
916 IF_NEXT_IS ('=', CPP_LSHIFT_EQ, CPP_LSHIFT);
918 else if (*buffer->cur == '?' && CPP_OPTION (pfile, cplusplus))
920 buffer->cur++;
921 IF_NEXT_IS ('=', CPP_MIN_EQ, CPP_MIN);
923 else if (CPP_OPTION (pfile, digraphs))
925 if (*buffer->cur == ':')
927 buffer->cur++;
928 result->flags |= DIGRAPH;
929 result->type = CPP_OPEN_SQUARE;
931 else if (*buffer->cur == '%')
933 buffer->cur++;
934 result->flags |= DIGRAPH;
935 result->type = CPP_OPEN_BRACE;
938 break;
940 case '>':
941 result->type = CPP_GREATER;
942 if (*buffer->cur == '=')
943 buffer->cur++, result->type = CPP_GREATER_EQ;
944 else if (*buffer->cur == '>')
946 buffer->cur++;
947 IF_NEXT_IS ('=', CPP_RSHIFT_EQ, CPP_RSHIFT);
949 else if (*buffer->cur == '?' && CPP_OPTION (pfile, cplusplus))
951 buffer->cur++;
952 IF_NEXT_IS ('=', CPP_MAX_EQ, CPP_MAX);
954 break;
956 case '%':
957 result->type = CPP_MOD;
958 if (*buffer->cur == '=')
959 buffer->cur++, result->type = CPP_MOD_EQ;
960 else if (CPP_OPTION (pfile, digraphs))
962 if (*buffer->cur == ':')
964 buffer->cur++;
965 result->flags |= DIGRAPH;
966 result->type = CPP_HASH;
967 if (*buffer->cur == '%' && buffer->cur[1] == ':')
968 buffer->cur += 2, result->type = CPP_PASTE;
970 else if (*buffer->cur == '>')
972 buffer->cur++;
973 result->flags |= DIGRAPH;
974 result->type = CPP_CLOSE_BRACE;
977 break;
979 case '.':
980 result->type = CPP_DOT;
981 if (ISDIGIT (*buffer->cur))
983 result->type = CPP_NUMBER;
984 lex_number (pfile, &result->val.str);
986 else if (*buffer->cur == '.' && buffer->cur[1] == '.')
987 buffer->cur += 2, result->type = CPP_ELLIPSIS;
988 else if (*buffer->cur == '*' && CPP_OPTION (pfile, cplusplus))
989 buffer->cur++, result->type = CPP_DOT_STAR;
990 break;
992 case '+':
993 result->type = CPP_PLUS;
994 if (*buffer->cur == '+')
995 buffer->cur++, result->type = CPP_PLUS_PLUS;
996 else if (*buffer->cur == '=')
997 buffer->cur++, result->type = CPP_PLUS_EQ;
998 break;
1000 case '-':
1001 result->type = CPP_MINUS;
1002 if (*buffer->cur == '>')
1004 buffer->cur++;
1005 result->type = CPP_DEREF;
1006 if (*buffer->cur == '*' && CPP_OPTION (pfile, cplusplus))
1007 buffer->cur++, result->type = CPP_DEREF_STAR;
1009 else if (*buffer->cur == '-')
1010 buffer->cur++, result->type = CPP_MINUS_MINUS;
1011 else if (*buffer->cur == '=')
1012 buffer->cur++, result->type = CPP_MINUS_EQ;
1013 break;
1015 case '&':
1016 result->type = CPP_AND;
1017 if (*buffer->cur == '&')
1018 buffer->cur++, result->type = CPP_AND_AND;
1019 else if (*buffer->cur == '=')
1020 buffer->cur++, result->type = CPP_AND_EQ;
1021 break;
1023 case '|':
1024 result->type = CPP_OR;
1025 if (*buffer->cur == '|')
1026 buffer->cur++, result->type = CPP_OR_OR;
1027 else if (*buffer->cur == '=')
1028 buffer->cur++, result->type = CPP_OR_EQ;
1029 break;
1031 case ':':
1032 result->type = CPP_COLON;
1033 if (*buffer->cur == ':' && CPP_OPTION (pfile, cplusplus))
1034 buffer->cur++, result->type = CPP_SCOPE;
1035 else if (*buffer->cur == '>' && CPP_OPTION (pfile, digraphs))
1037 buffer->cur++;
1038 result->flags |= DIGRAPH;
1039 result->type = CPP_CLOSE_SQUARE;
1041 break;
1043 case '*': IF_NEXT_IS ('=', CPP_MULT_EQ, CPP_MULT); break;
1044 case '=': IF_NEXT_IS ('=', CPP_EQ_EQ, CPP_EQ); break;
1045 case '!': IF_NEXT_IS ('=', CPP_NOT_EQ, CPP_NOT); break;
1046 case '^': IF_NEXT_IS ('=', CPP_XOR_EQ, CPP_XOR); break;
1047 case '#': IF_NEXT_IS ('#', CPP_PASTE, CPP_HASH); break;
1049 case '?': result->type = CPP_QUERY; break;
1050 case '~': result->type = CPP_COMPL; break;
1051 case ',': result->type = CPP_COMMA; break;
1052 case '(': result->type = CPP_OPEN_PAREN; break;
1053 case ')': result->type = CPP_CLOSE_PAREN; break;
1054 case '[': result->type = CPP_OPEN_SQUARE; break;
1055 case ']': result->type = CPP_CLOSE_SQUARE; break;
1056 case '{': result->type = CPP_OPEN_BRACE; break;
1057 case '}': result->type = CPP_CLOSE_BRACE; break;
1058 case ';': result->type = CPP_SEMICOLON; break;
1060 /* @ is a punctuator in Objective-C. */
1061 case '@': result->type = CPP_ATSIGN; break;
1063 case '$':
1064 case '\\':
1066 const uchar *base = --buffer->cur;
1068 if (forms_identifier_p (pfile, true))
1070 result->type = CPP_NAME;
1071 result->val.node = lex_identifier (pfile, base);
1072 break;
1074 buffer->cur++;
1077 default:
1078 create_literal (pfile, result, buffer->cur - 1, 1, CPP_OTHER);
1079 break;
1082 return result;
1085 /* An upper bound on the number of bytes needed to spell TOKEN.
1086 Does not include preceding whitespace. */
1087 unsigned int
1088 cpp_token_len (const cpp_token *token)
1090 unsigned int len;
1092 switch (TOKEN_SPELL (token))
1094 default: len = 4; break;
1095 case SPELL_LITERAL: len = token->val.str.len; break;
1096 case SPELL_IDENT: len = NODE_LEN (token->val.node); break;
1099 return len;
1102 /* Write the spelling of a token TOKEN to BUFFER. The buffer must
1103 already contain the enough space to hold the token's spelling.
1104 Returns a pointer to the character after the last character written.
1105 FIXME: Would be nice if we didn't need the PFILE argument. */
1106 unsigned char *
1107 cpp_spell_token (cpp_reader *pfile, const cpp_token *token,
1108 unsigned char *buffer)
1110 switch (TOKEN_SPELL (token))
1112 case SPELL_OPERATOR:
1114 const unsigned char *spelling;
1115 unsigned char c;
1117 if (token->flags & DIGRAPH)
1118 spelling
1119 = digraph_spellings[(int) token->type - (int) CPP_FIRST_DIGRAPH];
1120 else if (token->flags & NAMED_OP)
1121 goto spell_ident;
1122 else
1123 spelling = TOKEN_NAME (token);
1125 while ((c = *spelling++) != '\0')
1126 *buffer++ = c;
1128 break;
1130 spell_ident:
1131 case SPELL_IDENT:
1132 memcpy (buffer, NODE_NAME (token->val.node), NODE_LEN (token->val.node));
1133 buffer += NODE_LEN (token->val.node);
1134 break;
1136 case SPELL_LITERAL:
1137 memcpy (buffer, token->val.str.text, token->val.str.len);
1138 buffer += token->val.str.len;
1139 break;
1141 case SPELL_NONE:
1142 cpp_error (pfile, DL_ICE, "unspellable token %s", TOKEN_NAME (token));
1143 break;
1146 return buffer;
1149 /* Returns TOKEN spelt as a null-terminated string. The string is
1150 freed when the reader is destroyed. Useful for diagnostics. */
1151 unsigned char *
1152 cpp_token_as_text (cpp_reader *pfile, const cpp_token *token)
1154 unsigned int len = cpp_token_len (token) + 1;
1155 unsigned char *start = _cpp_unaligned_alloc (pfile, len), *end;
1157 end = cpp_spell_token (pfile, token, start);
1158 end[0] = '\0';
1160 return start;
1163 /* Used by C front ends, which really should move to using
1164 cpp_token_as_text. */
1165 const char *
1166 cpp_type2name (enum cpp_ttype type)
1168 return (const char *) token_spellings[type].name;
1171 /* Writes the spelling of token to FP, without any preceding space.
1172 Separated from cpp_spell_token for efficiency - to avoid stdio
1173 double-buffering. */
1174 void
1175 cpp_output_token (const cpp_token *token, FILE *fp)
1177 switch (TOKEN_SPELL (token))
1179 case SPELL_OPERATOR:
1181 const unsigned char *spelling;
1182 int c;
1184 if (token->flags & DIGRAPH)
1185 spelling
1186 = digraph_spellings[(int) token->type - (int) CPP_FIRST_DIGRAPH];
1187 else if (token->flags & NAMED_OP)
1188 goto spell_ident;
1189 else
1190 spelling = TOKEN_NAME (token);
1192 c = *spelling;
1194 putc (c, fp);
1195 while ((c = *++spelling) != '\0');
1197 break;
1199 spell_ident:
1200 case SPELL_IDENT:
1201 fwrite (NODE_NAME (token->val.node), 1, NODE_LEN (token->val.node), fp);
1202 break;
1204 case SPELL_LITERAL:
1205 fwrite (token->val.str.text, 1, token->val.str.len, fp);
1206 break;
1208 case SPELL_NONE:
1209 /* An error, most probably. */
1210 break;
1214 /* Compare two tokens. */
1216 _cpp_equiv_tokens (const cpp_token *a, const cpp_token *b)
1218 if (a->type == b->type && a->flags == b->flags)
1219 switch (TOKEN_SPELL (a))
1221 default: /* Keep compiler happy. */
1222 case SPELL_OPERATOR:
1223 return 1;
1224 case SPELL_NONE:
1225 return (a->type != CPP_MACRO_ARG || a->val.arg_no == b->val.arg_no);
1226 case SPELL_IDENT:
1227 return a->val.node == b->val.node;
1228 case SPELL_LITERAL:
1229 return (a->val.str.len == b->val.str.len
1230 && !memcmp (a->val.str.text, b->val.str.text,
1231 a->val.str.len));
1234 return 0;
1237 /* Returns nonzero if a space should be inserted to avoid an
1238 accidental token paste for output. For simplicity, it is
1239 conservative, and occasionally advises a space where one is not
1240 needed, e.g. "." and ".2". */
1242 cpp_avoid_paste (cpp_reader *pfile, const cpp_token *token1,
1243 const cpp_token *token2)
1245 enum cpp_ttype a = token1->type, b = token2->type;
1246 cppchar_t c;
1248 if (token1->flags & NAMED_OP)
1249 a = CPP_NAME;
1250 if (token2->flags & NAMED_OP)
1251 b = CPP_NAME;
1253 c = EOF;
1254 if (token2->flags & DIGRAPH)
1255 c = digraph_spellings[(int) b - (int) CPP_FIRST_DIGRAPH][0];
1256 else if (token_spellings[b].category == SPELL_OPERATOR)
1257 c = token_spellings[b].name[0];
1259 /* Quickly get everything that can paste with an '='. */
1260 if ((int) a <= (int) CPP_LAST_EQ && c == '=')
1261 return 1;
1263 switch (a)
1265 case CPP_GREATER: return c == '>' || c == '?';
1266 case CPP_LESS: return c == '<' || c == '?' || c == '%' || c == ':';
1267 case CPP_PLUS: return c == '+';
1268 case CPP_MINUS: return c == '-' || c == '>';
1269 case CPP_DIV: return c == '/' || c == '*'; /* Comments. */
1270 case CPP_MOD: return c == ':' || c == '>';
1271 case CPP_AND: return c == '&';
1272 case CPP_OR: return c == '|';
1273 case CPP_COLON: return c == ':' || c == '>';
1274 case CPP_DEREF: return c == '*';
1275 case CPP_DOT: return c == '.' || c == '%' || b == CPP_NUMBER;
1276 case CPP_HASH: return c == '#' || c == '%'; /* Digraph form. */
1277 case CPP_NAME: return ((b == CPP_NUMBER
1278 && name_p (pfile, &token2->val.str))
1279 || b == CPP_NAME
1280 || b == CPP_CHAR || b == CPP_STRING); /* L */
1281 case CPP_NUMBER: return (b == CPP_NUMBER || b == CPP_NAME
1282 || c == '.' || c == '+' || c == '-');
1283 /* UCNs */
1284 case CPP_OTHER: return ((token1->val.str.text[0] == '\\'
1285 && b == CPP_NAME)
1286 || (CPP_OPTION (pfile, objc)
1287 && token1->val.str.text[0] == '@'
1288 && (b == CPP_NAME || b == CPP_STRING)));
1289 default: break;
1292 return 0;
1295 /* Output all the remaining tokens on the current line, and a newline
1296 character, to FP. Leading whitespace is removed. If there are
1297 macros, special token padding is not performed. */
1298 void
1299 cpp_output_line (cpp_reader *pfile, FILE *fp)
1301 const cpp_token *token;
1303 token = cpp_get_token (pfile);
1304 while (token->type != CPP_EOF)
1306 cpp_output_token (token, fp);
1307 token = cpp_get_token (pfile);
1308 if (token->flags & PREV_WHITE)
1309 putc (' ', fp);
1312 putc ('\n', fp);
1315 /* Memory buffers. Changing these three constants can have a dramatic
1316 effect on performance. The values here are reasonable defaults,
1317 but might be tuned. If you adjust them, be sure to test across a
1318 range of uses of cpplib, including heavy nested function-like macro
1319 expansion. Also check the change in peak memory usage (NJAMD is a
1320 good tool for this). */
1321 #define MIN_BUFF_SIZE 8000
1322 #define BUFF_SIZE_UPPER_BOUND(MIN_SIZE) (MIN_BUFF_SIZE + (MIN_SIZE) * 3 / 2)
1323 #define EXTENDED_BUFF_SIZE(BUFF, MIN_EXTRA) \
1324 (MIN_EXTRA + ((BUFF)->limit - (BUFF)->cur) * 2)
1326 #if MIN_BUFF_SIZE > BUFF_SIZE_UPPER_BOUND (0)
1327 #error BUFF_SIZE_UPPER_BOUND must be at least as large as MIN_BUFF_SIZE!
1328 #endif
1330 /* Create a new allocation buffer. Place the control block at the end
1331 of the buffer, so that buffer overflows will cause immediate chaos. */
1332 static _cpp_buff *
1333 new_buff (size_t len)
1335 _cpp_buff *result;
1336 unsigned char *base;
1338 if (len < MIN_BUFF_SIZE)
1339 len = MIN_BUFF_SIZE;
1340 len = CPP_ALIGN (len);
1342 base = xmalloc (len + sizeof (_cpp_buff));
1343 result = (_cpp_buff *) (base + len);
1344 result->base = base;
1345 result->cur = base;
1346 result->limit = base + len;
1347 result->next = NULL;
1348 return result;
1351 /* Place a chain of unwanted allocation buffers on the free list. */
1352 void
1353 _cpp_release_buff (cpp_reader *pfile, _cpp_buff *buff)
1355 _cpp_buff *end = buff;
1357 while (end->next)
1358 end = end->next;
1359 end->next = pfile->free_buffs;
1360 pfile->free_buffs = buff;
1363 /* Return a free buffer of size at least MIN_SIZE. */
1364 _cpp_buff *
1365 _cpp_get_buff (cpp_reader *pfile, size_t min_size)
1367 _cpp_buff *result, **p;
1369 for (p = &pfile->free_buffs;; p = &(*p)->next)
1371 size_t size;
1373 if (*p == NULL)
1374 return new_buff (min_size);
1375 result = *p;
1376 size = result->limit - result->base;
1377 /* Return a buffer that's big enough, but don't waste one that's
1378 way too big. */
1379 if (size >= min_size && size <= BUFF_SIZE_UPPER_BOUND (min_size))
1380 break;
1383 *p = result->next;
1384 result->next = NULL;
1385 result->cur = result->base;
1386 return result;
1389 /* Creates a new buffer with enough space to hold the uncommitted
1390 remaining bytes of BUFF, and at least MIN_EXTRA more bytes. Copies
1391 the excess bytes to the new buffer. Chains the new buffer after
1392 BUFF, and returns the new buffer. */
1393 _cpp_buff *
1394 _cpp_append_extend_buff (cpp_reader *pfile, _cpp_buff *buff, size_t min_extra)
1396 size_t size = EXTENDED_BUFF_SIZE (buff, min_extra);
1397 _cpp_buff *new_buff = _cpp_get_buff (pfile, size);
1399 buff->next = new_buff;
1400 memcpy (new_buff->base, buff->cur, BUFF_ROOM (buff));
1401 return new_buff;
1404 /* Creates a new buffer with enough space to hold the uncommitted
1405 remaining bytes of the buffer pointed to by BUFF, and at least
1406 MIN_EXTRA more bytes. Copies the excess bytes to the new buffer.
1407 Chains the new buffer before the buffer pointed to by BUFF, and
1408 updates the pointer to point to the new buffer. */
1409 void
1410 _cpp_extend_buff (cpp_reader *pfile, _cpp_buff **pbuff, size_t min_extra)
1412 _cpp_buff *new_buff, *old_buff = *pbuff;
1413 size_t size = EXTENDED_BUFF_SIZE (old_buff, min_extra);
1415 new_buff = _cpp_get_buff (pfile, size);
1416 memcpy (new_buff->base, old_buff->cur, BUFF_ROOM (old_buff));
1417 new_buff->next = old_buff;
1418 *pbuff = new_buff;
1421 /* Free a chain of buffers starting at BUFF. */
1422 void
1423 _cpp_free_buff (_cpp_buff *buff)
1425 _cpp_buff *next;
1427 for (; buff; buff = next)
1429 next = buff->next;
1430 free (buff->base);
1434 /* Allocate permanent, unaligned storage of length LEN. */
1435 unsigned char *
1436 _cpp_unaligned_alloc (cpp_reader *pfile, size_t len)
1438 _cpp_buff *buff = pfile->u_buff;
1439 unsigned char *result = buff->cur;
1441 if (len > (size_t) (buff->limit - result))
1443 buff = _cpp_get_buff (pfile, len);
1444 buff->next = pfile->u_buff;
1445 pfile->u_buff = buff;
1446 result = buff->cur;
1449 buff->cur = result + len;
1450 return result;
1453 /* Allocate permanent, unaligned storage of length LEN from a_buff.
1454 That buffer is used for growing allocations when saving macro
1455 replacement lists in a #define, and when parsing an answer to an
1456 assertion in #assert, #unassert or #if (and therefore possibly
1457 whilst expanding macros). It therefore must not be used by any
1458 code that they might call: specifically the lexer and the guts of
1459 the macro expander.
1461 All existing other uses clearly fit this restriction: storing
1462 registered pragmas during initialization. */
1463 unsigned char *
1464 _cpp_aligned_alloc (cpp_reader *pfile, size_t len)
1466 _cpp_buff *buff = pfile->a_buff;
1467 unsigned char *result = buff->cur;
1469 if (len > (size_t) (buff->limit - result))
1471 buff = _cpp_get_buff (pfile, len);
1472 buff->next = pfile->a_buff;
1473 pfile->a_buff = buff;
1474 result = buff->cur;
1477 buff->cur = result + len;
1478 return result;