* config/xtensa/xtensa-protos.h: Convert to ISO C90.
[official-gcc.git] / gcc / cpplex.c
blobf779dfb7bc2aac2c4c7e8cd8f7b09299466e4689
1 /* CPP Library - lexical analysis.
2 Copyright (C) 2000, 2001, 2002, 2003 Free Software Foundation, Inc.
3 Contributed by Per Bothner, 1994-95.
4 Based on CCCP program by Paul Rubin, June 1986
5 Adapted to ANSI C, Richard Stallman, Jan 1987
6 Broken out to separate file, Zack Weinberg, Mar 2000
8 This program is free software; you can redistribute it and/or modify it
9 under the terms of the GNU General Public License as published by the
10 Free Software Foundation; either version 2, or (at your option) any
11 later version.
13 This program is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with this program; if not, write to the Free Software
20 Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
22 #include "config.h"
23 #include "system.h"
24 #include "cpplib.h"
25 #include "cpphash.h"
27 enum spell_type
29 SPELL_OPERATOR = 0,
30 SPELL_IDENT,
31 SPELL_LITERAL,
32 SPELL_NONE
35 struct token_spelling
37 enum spell_type category;
38 const unsigned char *name;
41 static const unsigned char *const digraph_spellings[] =
42 { U"%:", U"%:%:", U"<:", U":>", U"<%", U"%>" };
44 #define OP(e, s) { SPELL_OPERATOR, U s },
45 #define TK(e, s) { s, U #e },
46 static const struct token_spelling token_spellings[N_TTYPES] = { TTYPE_TABLE };
47 #undef OP
48 #undef TK
50 #define TOKEN_SPELL(token) (token_spellings[(token)->type].category)
51 #define TOKEN_NAME(token) (token_spellings[(token)->type].name)
53 static void add_line_note (cpp_buffer *, const uchar *, unsigned int);
54 static int skip_line_comment (cpp_reader *);
55 static void skip_whitespace (cpp_reader *, cppchar_t);
56 static cpp_hashnode *lex_identifier (cpp_reader *, const uchar *);
57 static void lex_number (cpp_reader *, cpp_string *);
58 static bool forms_identifier_p (cpp_reader *, int);
59 static void lex_string (cpp_reader *, cpp_token *, const uchar *);
60 static void save_comment (cpp_reader *, cpp_token *, const uchar *, cppchar_t);
61 static void create_literal (cpp_reader *, cpp_token *, const uchar *,
62 unsigned int, enum cpp_ttype);
63 static bool warn_in_comment (cpp_reader *, _cpp_line_note *);
64 static int name_p (cpp_reader *, const cpp_string *);
65 static tokenrun *next_tokenrun (tokenrun *);
67 static _cpp_buff *new_buff (size_t);
70 /* Utility routine:
72 Compares, the token TOKEN to the NUL-terminated string STRING.
73 TOKEN must be a CPP_NAME. Returns 1 for equal, 0 for unequal. */
74 int
75 cpp_ideq (const cpp_token *token, const char *string)
77 if (token->type != CPP_NAME)
78 return 0;
80 return !ustrcmp (NODE_NAME (token->val.node), (const uchar *) string);
83 /* Record a note TYPE at byte POS into the current cleaned logical
84 line. */
85 static void
86 add_line_note (cpp_buffer *buffer, const uchar *pos, unsigned int type)
88 if (buffer->notes_used == buffer->notes_cap)
90 buffer->notes_cap = buffer->notes_cap * 2 + 200;
91 buffer->notes = xrealloc (buffer->notes,
92 buffer->notes_cap * sizeof (_cpp_line_note));
95 buffer->notes[buffer->notes_used].pos = pos;
96 buffer->notes[buffer->notes_used].type = type;
97 buffer->notes_used++;
100 /* Returns with a logical line that contains no escaped newlines or
101 trigraphs. This is a time-critical inner loop. */
102 void
103 _cpp_clean_line (cpp_reader *pfile)
105 cpp_buffer *buffer;
106 const uchar *s;
107 uchar c, *d, *p;
109 buffer = pfile->buffer;
110 buffer->cur_note = buffer->notes_used = 0;
111 buffer->cur = buffer->line_base = buffer->next_line;
112 buffer->need_line = false;
113 s = buffer->next_line - 1;
115 if (!buffer->from_stage3)
117 d = (uchar *) s;
119 for (;;)
121 c = *++s;
122 *++d = c;
124 if (c == '\n' || c == '\r')
126 /* Handle DOS line endings. */
127 if (c == '\r' && s != buffer->rlimit && s[1] == '\n')
128 s++;
129 if (s == buffer->rlimit)
130 break;
132 /* Escaped? */
133 p = d;
134 while (p != buffer->next_line && is_nvspace (p[-1]))
135 p--;
136 if (p == buffer->next_line || p[-1] != '\\')
137 break;
139 add_line_note (buffer, p - 1, p != d ? ' ': '\\');
140 d = p - 2;
141 buffer->next_line = p - 1;
143 else if (c == '?' && s[1] == '?' && _cpp_trigraph_map[s[2]])
145 /* Add a note regardless, for the benefit of -Wtrigraphs. */
146 add_line_note (buffer, d, s[2]);
147 if (CPP_OPTION (pfile, trigraphs))
149 *d = _cpp_trigraph_map[s[2]];
150 s += 2;
155 else
158 s++;
159 while (*s != '\n' && *s != '\r');
160 d = (uchar *) s;
162 /* Handle DOS line endings. */
163 if (*s == '\r' && s != buffer->rlimit && s[1] == '\n')
164 s++;
167 *d = '\n';
168 /* A sentinel note that should never be processed. */
169 add_line_note (buffer, d + 1, '\n');
170 buffer->next_line = s + 1;
173 /* Return true if the trigraph indicated by NOTE should be warned
174 about in a comment. */
175 static bool
176 warn_in_comment (cpp_reader *pfile, _cpp_line_note *note)
178 const uchar *p;
180 /* Within comments we don't warn about trigraphs, unless the
181 trigraph forms an escaped newline, as that may change
182 behavior. */
183 if (note->type != '/')
184 return false;
186 /* If -trigraphs, then this was an escaped newline iff the next note
187 is coincident. */
188 if (CPP_OPTION (pfile, trigraphs))
189 return note[1].pos == note->pos;
191 /* Otherwise, see if this forms an escaped newline. */
192 p = note->pos + 3;
193 while (is_nvspace (*p))
194 p++;
196 /* There might have been escaped newlines between the trigraph and the
197 newline we found. Hence the position test. */
198 return (*p == '\n' && p < note[1].pos);
201 /* Process the notes created by add_line_note as far as the current
202 location. */
203 void
204 _cpp_process_line_notes (cpp_reader *pfile, int in_comment)
206 cpp_buffer *buffer = pfile->buffer;
208 for (;;)
210 _cpp_line_note *note = &buffer->notes[buffer->cur_note];
211 unsigned int col;
213 if (note->pos > buffer->cur)
214 break;
216 buffer->cur_note++;
217 col = CPP_BUF_COLUMN (buffer, note->pos + 1);
219 if (note->type == '\\' || note->type == ' ')
221 if (note->type == ' ' && !in_comment)
222 cpp_error_with_line (pfile, DL_WARNING, pfile->line, col,
223 "backslash and newline separated by space");
225 if (buffer->next_line > buffer->rlimit)
227 cpp_error_with_line (pfile, DL_PEDWARN, pfile->line, col,
228 "backslash-newline at end of file");
229 /* Prevent "no newline at end of file" warning. */
230 buffer->next_line = buffer->rlimit;
233 buffer->line_base = note->pos;
234 pfile->line++;
236 else if (_cpp_trigraph_map[note->type])
238 if (CPP_OPTION (pfile, warn_trigraphs)
239 && (!in_comment || warn_in_comment (pfile, note)))
241 if (CPP_OPTION (pfile, trigraphs))
242 cpp_error_with_line (pfile, DL_WARNING, pfile->line, col,
243 "trigraph ??%c converted to %c",
244 note->type,
245 (int) _cpp_trigraph_map[note->type]);
246 else
248 cpp_error_with_line
249 (pfile, DL_WARNING, pfile->line, col,
250 "trigraph ??%c ignored, use -trigraphs to enable",
251 note->type);
255 else
256 abort ();
260 /* Skip a C-style block comment. We find the end of the comment by
261 seeing if an asterisk is before every '/' we encounter. Returns
262 nonzero if comment terminated by EOF, zero otherwise.
264 Buffer->cur points to the initial asterisk of the comment. */
265 bool
266 _cpp_skip_block_comment (cpp_reader *pfile)
268 cpp_buffer *buffer = pfile->buffer;
269 cppchar_t c;
271 buffer->cur++;
272 if (*buffer->cur == '/')
273 buffer->cur++;
275 for (;;)
277 c = *buffer->cur++;
279 /* People like decorating comments with '*', so check for '/'
280 instead for efficiency. */
281 if (c == '/')
283 if (buffer->cur[-2] == '*')
284 break;
286 /* Warn about potential nested comments, but not if the '/'
287 comes immediately before the true comment delimiter.
288 Don't bother to get it right across escaped newlines. */
289 if (CPP_OPTION (pfile, warn_comments)
290 && buffer->cur[0] == '*' && buffer->cur[1] != '/')
291 cpp_error_with_line (pfile, DL_WARNING,
292 pfile->line, CPP_BUF_COL (buffer),
293 "\"/*\" within comment");
295 else if (c == '\n')
297 buffer->cur--;
298 _cpp_process_line_notes (pfile, true);
299 if (buffer->next_line >= buffer->rlimit)
300 return true;
301 _cpp_clean_line (pfile);
302 pfile->line++;
306 _cpp_process_line_notes (pfile, true);
307 return false;
310 /* Skip a C++ line comment, leaving buffer->cur pointing to the
311 terminating newline. Handles escaped newlines. Returns nonzero
312 if a multiline comment. */
313 static int
314 skip_line_comment (cpp_reader *pfile)
316 cpp_buffer *buffer = pfile->buffer;
317 unsigned int orig_line = pfile->line;
319 while (*buffer->cur != '\n')
320 buffer->cur++;
322 _cpp_process_line_notes (pfile, true);
323 return orig_line != pfile->line;
326 /* Skips whitespace, saving the next non-whitespace character. */
327 static void
328 skip_whitespace (cpp_reader *pfile, cppchar_t c)
330 cpp_buffer *buffer = pfile->buffer;
331 bool saw_NUL = false;
335 /* Horizontal space always OK. */
336 if (c == ' ' || c == '\t')
338 /* Just \f \v or \0 left. */
339 else if (c == '\0')
340 saw_NUL = true;
341 else if (pfile->state.in_directive && CPP_PEDANTIC (pfile))
342 cpp_error_with_line (pfile, DL_PEDWARN, pfile->line,
343 CPP_BUF_COL (buffer),
344 "%s in preprocessing directive",
345 c == '\f' ? "form feed" : "vertical tab");
347 c = *buffer->cur++;
349 /* We only want non-vertical space, i.e. ' ' \t \f \v \0. */
350 while (is_nvspace (c));
352 if (saw_NUL)
353 cpp_error (pfile, DL_WARNING, "null character(s) ignored");
355 buffer->cur--;
358 /* See if the characters of a number token are valid in a name (no
359 '.', '+' or '-'). */
360 static int
361 name_p (cpp_reader *pfile, const cpp_string *string)
363 unsigned int i;
365 for (i = 0; i < string->len; i++)
366 if (!is_idchar (string->text[i]))
367 return 0;
369 return 1;
372 /* Returns TRUE if the sequence starting at buffer->cur is invalid in
373 an identifier. FIRST is TRUE if this starts an identifier. */
374 static bool
375 forms_identifier_p (cpp_reader *pfile, int first)
377 cpp_buffer *buffer = pfile->buffer;
379 if (*buffer->cur == '$')
381 if (!CPP_OPTION (pfile, dollars_in_ident))
382 return false;
384 buffer->cur++;
385 if (CPP_OPTION (pfile, warn_dollars) && !pfile->state.skipping)
387 CPP_OPTION (pfile, warn_dollars) = 0;
388 cpp_error (pfile, DL_PEDWARN, "'$' in identifier or number");
391 return true;
394 /* Is this a syntactically valid UCN? */
395 if (0 && *buffer->cur == '\\'
396 && (buffer->cur[1] == 'u' || buffer->cur[1] == 'U'))
398 buffer->cur += 2;
399 if (_cpp_valid_ucn (pfile, &buffer->cur, buffer->rlimit, 1 + !first))
400 return true;
401 buffer->cur -= 2;
404 return false;
407 /* Lex an identifier starting at BUFFER->CUR - 1. */
408 static cpp_hashnode *
409 lex_identifier (cpp_reader *pfile, const uchar *base)
411 cpp_hashnode *result;
412 const uchar *cur;
416 cur = pfile->buffer->cur;
418 /* N.B. ISIDNUM does not include $. */
419 while (ISIDNUM (*cur))
420 cur++;
422 pfile->buffer->cur = cur;
424 while (forms_identifier_p (pfile, false));
426 result = (cpp_hashnode *)
427 ht_lookup (pfile->hash_table, base, cur - base, HT_ALLOC);
429 /* Rarely, identifiers require diagnostics when lexed. */
430 if (__builtin_expect ((result->flags & NODE_DIAGNOSTIC)
431 && !pfile->state.skipping, 0))
433 /* It is allowed to poison the same identifier twice. */
434 if ((result->flags & NODE_POISONED) && !pfile->state.poisoned_ok)
435 cpp_error (pfile, DL_ERROR, "attempt to use poisoned \"%s\"",
436 NODE_NAME (result));
438 /* Constraint 6.10.3.5: __VA_ARGS__ should only appear in the
439 replacement list of a variadic macro. */
440 if (result == pfile->spec_nodes.n__VA_ARGS__
441 && !pfile->state.va_args_ok)
442 cpp_error (pfile, DL_PEDWARN,
443 "__VA_ARGS__ can only appear in the expansion"
444 " of a C99 variadic macro");
447 return result;
450 /* Lex a number to NUMBER starting at BUFFER->CUR - 1. */
451 static void
452 lex_number (cpp_reader *pfile, cpp_string *number)
454 const uchar *cur;
455 const uchar *base;
456 uchar *dest;
458 base = pfile->buffer->cur - 1;
461 cur = pfile->buffer->cur;
463 /* N.B. ISIDNUM does not include $. */
464 while (ISIDNUM (*cur) || *cur == '.' || VALID_SIGN (*cur, cur[-1]))
465 cur++;
467 pfile->buffer->cur = cur;
469 while (forms_identifier_p (pfile, false));
471 number->len = cur - base;
472 dest = _cpp_unaligned_alloc (pfile, number->len + 1);
473 memcpy (dest, base, number->len);
474 dest[number->len] = '\0';
475 number->text = dest;
478 /* Create a token of type TYPE with a literal spelling. */
479 static void
480 create_literal (cpp_reader *pfile, cpp_token *token, const uchar *base,
481 unsigned int len, enum cpp_ttype type)
483 uchar *dest = _cpp_unaligned_alloc (pfile, len + 1);
485 memcpy (dest, base, len);
486 dest[len] = '\0';
487 token->type = type;
488 token->val.str.len = len;
489 token->val.str.text = dest;
492 /* Lexes a string, character constant, or angle-bracketed header file
493 name. The stored string contains the spelling, including opening
494 quote and leading any leading 'L'. It returns the type of the
495 literal, or CPP_OTHER if it was not properly terminated.
497 The spelling is NUL-terminated, but it is not guaranteed that this
498 is the first NUL since embedded NULs are preserved. */
499 static void
500 lex_string (cpp_reader *pfile, cpp_token *token, const uchar *base)
502 bool saw_NUL = false;
503 const uchar *cur;
504 cppchar_t terminator;
505 enum cpp_ttype type;
507 cur = base;
508 terminator = *cur++;
509 if (terminator == 'L')
510 terminator = *cur++;
511 if (terminator == '\"')
512 type = *base == 'L' ? CPP_WSTRING: CPP_STRING;
513 else if (terminator == '\'')
514 type = *base == 'L' ? CPP_WCHAR: CPP_CHAR;
515 else
516 terminator = '>', type = CPP_HEADER_NAME;
518 for (;;)
520 cppchar_t c = *cur++;
522 /* In #include-style directives, terminators are not escapable. */
523 if (c == '\\' && !pfile->state.angled_headers && *cur != '\n')
524 cur++;
525 else if (c == terminator)
526 break;
527 else if (c == '\n')
529 cur--;
530 type = CPP_OTHER;
531 break;
533 else if (c == '\0')
534 saw_NUL = true;
537 if (saw_NUL && !pfile->state.skipping)
538 cpp_error (pfile, DL_WARNING, "null character(s) preserved in literal");
540 pfile->buffer->cur = cur;
541 create_literal (pfile, token, base, cur - base, type);
544 /* The stored comment includes the comment start and any terminator. */
545 static void
546 save_comment (cpp_reader *pfile, cpp_token *token, const unsigned char *from,
547 cppchar_t type)
549 unsigned char *buffer;
550 unsigned int len, clen;
552 len = pfile->buffer->cur - from + 1; /* + 1 for the initial '/'. */
554 /* C++ comments probably (not definitely) have moved past a new
555 line, which we don't want to save in the comment. */
556 if (is_vspace (pfile->buffer->cur[-1]))
557 len--;
559 /* If we are currently in a directive, then we need to store all
560 C++ comments as C comments internally, and so we need to
561 allocate a little extra space in that case.
563 Note that the only time we encounter a directive here is
564 when we are saving comments in a "#define". */
565 clen = (pfile->state.in_directive && type == '/') ? len + 2 : len;
567 buffer = _cpp_unaligned_alloc (pfile, clen);
569 token->type = CPP_COMMENT;
570 token->val.str.len = clen;
571 token->val.str.text = buffer;
573 buffer[0] = '/';
574 memcpy (buffer + 1, from, len - 1);
576 /* Finish conversion to a C comment, if necessary. */
577 if (pfile->state.in_directive && type == '/')
579 buffer[1] = '*';
580 buffer[clen - 2] = '*';
581 buffer[clen - 1] = '/';
585 /* Allocate COUNT tokens for RUN. */
586 void
587 _cpp_init_tokenrun (tokenrun *run, unsigned int count)
589 run->base = xnewvec (cpp_token, count);
590 run->limit = run->base + count;
591 run->next = NULL;
594 /* Returns the next tokenrun, or creates one if there is none. */
595 static tokenrun *
596 next_tokenrun (tokenrun *run)
598 if (run->next == NULL)
600 run->next = xnew (tokenrun);
601 run->next->prev = run;
602 _cpp_init_tokenrun (run->next, 250);
605 return run->next;
608 /* Allocate a single token that is invalidated at the same time as the
609 rest of the tokens on the line. Has its line and col set to the
610 same as the last lexed token, so that diagnostics appear in the
611 right place. */
612 cpp_token *
613 _cpp_temp_token (cpp_reader *pfile)
615 cpp_token *old, *result;
617 old = pfile->cur_token - 1;
618 if (pfile->cur_token == pfile->cur_run->limit)
620 pfile->cur_run = next_tokenrun (pfile->cur_run);
621 pfile->cur_token = pfile->cur_run->base;
624 result = pfile->cur_token++;
625 result->line = old->line;
626 result->col = old->col;
627 return result;
630 /* Lex a token into RESULT (external interface). Takes care of issues
631 like directive handling, token lookahead, multiple include
632 optimization and skipping. */
633 const cpp_token *
634 _cpp_lex_token (cpp_reader *pfile)
636 cpp_token *result;
638 for (;;)
640 if (pfile->cur_token == pfile->cur_run->limit)
642 pfile->cur_run = next_tokenrun (pfile->cur_run);
643 pfile->cur_token = pfile->cur_run->base;
646 if (pfile->lookaheads)
648 pfile->lookaheads--;
649 result = pfile->cur_token++;
651 else
652 result = _cpp_lex_direct (pfile);
654 if (result->flags & BOL)
656 /* Is this a directive. If _cpp_handle_directive returns
657 false, it is an assembler #. */
658 if (result->type == CPP_HASH
659 /* 6.10.3 p 11: Directives in a list of macro arguments
660 gives undefined behavior. This implementation
661 handles the directive as normal. */
662 && pfile->state.parsing_args != 1
663 && _cpp_handle_directive (pfile, result->flags & PREV_WHITE))
664 continue;
665 if (pfile->cb.line_change && !pfile->state.skipping)
666 pfile->cb.line_change (pfile, result, pfile->state.parsing_args);
669 /* We don't skip tokens in directives. */
670 if (pfile->state.in_directive)
671 break;
673 /* Outside a directive, invalidate controlling macros. At file
674 EOF, _cpp_lex_direct takes care of popping the buffer, so we never
675 get here and MI optimization works. */
676 pfile->mi_valid = false;
678 if (!pfile->state.skipping || result->type == CPP_EOF)
679 break;
682 return result;
685 /* Returns true if a fresh line has been loaded. */
686 bool
687 _cpp_get_fresh_line (cpp_reader *pfile)
689 /* We can't get a new line until we leave the current directive. */
690 if (pfile->state.in_directive)
691 return false;
693 for (;;)
695 cpp_buffer *buffer = pfile->buffer;
697 if (buffer == NULL)
698 return false;
700 if (!buffer->need_line)
701 return true;
703 if (buffer->next_line < buffer->rlimit)
705 _cpp_clean_line (pfile);
706 return true;
709 /* First, get out of parsing arguments state. */
710 if (pfile->state.parsing_args)
711 return false;
713 /* End of buffer. Non-empty files should end in a newline. */
714 if (buffer->buf != buffer->rlimit
715 && buffer->next_line > buffer->rlimit
716 && !buffer->from_stage3)
718 /* Only warn once. */
719 buffer->next_line = buffer->rlimit;
720 cpp_error_with_line (pfile, DL_PEDWARN, pfile->line - 1,
721 CPP_BUF_COLUMN (buffer, buffer->cur),
722 "no newline at end of file");
725 if (buffer->return_at_eof)
727 _cpp_pop_buffer (pfile);
728 return false;
731 _cpp_pop_buffer (pfile);
735 #define IF_NEXT_IS(CHAR, THEN_TYPE, ELSE_TYPE) \
736 do \
738 result->type = ELSE_TYPE; \
739 if (*buffer->cur == CHAR) \
740 buffer->cur++, result->type = THEN_TYPE; \
742 while (0)
744 /* Lex a token into pfile->cur_token, which is also incremented, to
745 get diagnostics pointing to the correct location.
747 Does not handle issues such as token lookahead, multiple-include
748 optimization, directives, skipping etc. This function is only
749 suitable for use by _cpp_lex_token, and in special cases like
750 lex_expansion_token which doesn't care for any of these issues.
752 When meeting a newline, returns CPP_EOF if parsing a directive,
753 otherwise returns to the start of the token buffer if permissible.
754 Returns the location of the lexed token. */
755 cpp_token *
756 _cpp_lex_direct (cpp_reader *pfile)
758 cppchar_t c;
759 cpp_buffer *buffer;
760 const unsigned char *comment_start;
761 cpp_token *result = pfile->cur_token++;
763 fresh_line:
764 result->flags = 0;
765 buffer = pfile->buffer;
766 if (buffer == NULL || buffer->need_line)
768 if (!_cpp_get_fresh_line (pfile))
770 result->type = CPP_EOF;
771 if (!pfile->state.in_directive)
773 /* Tell the compiler the line number of the EOF token. */
774 result->line = pfile->line;
775 result->flags = BOL;
777 return result;
779 if (!pfile->keep_tokens)
781 pfile->cur_run = &pfile->base_run;
782 result = pfile->base_run.base;
783 pfile->cur_token = result + 1;
785 result->flags = BOL;
786 if (pfile->state.parsing_args == 2)
787 result->flags |= PREV_WHITE;
788 buffer = pfile->buffer;
790 update_tokens_line:
791 result->line = pfile->line;
793 skipped_white:
794 if (buffer->cur >= buffer->notes[buffer->cur_note].pos
795 && !pfile->overlaid_buffer)
797 _cpp_process_line_notes (pfile, false);
798 result->line = pfile->line;
800 c = *buffer->cur++;
801 result->col = CPP_BUF_COLUMN (buffer, buffer->cur);
803 switch (c)
805 case ' ': case '\t': case '\f': case '\v': case '\0':
806 result->flags |= PREV_WHITE;
807 skip_whitespace (pfile, c);
808 goto skipped_white;
810 case '\n':
811 pfile->line++;
812 buffer->need_line = true;
813 goto fresh_line;
815 case '0': case '1': case '2': case '3': case '4':
816 case '5': case '6': case '7': case '8': case '9':
817 result->type = CPP_NUMBER;
818 lex_number (pfile, &result->val.str);
819 break;
821 case 'L':
822 /* 'L' may introduce wide characters or strings. */
823 if (*buffer->cur == '\'' || *buffer->cur == '"')
825 lex_string (pfile, result, buffer->cur - 1);
826 break;
828 /* Fall through. */
830 case '_':
831 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
832 case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
833 case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
834 case 's': case 't': case 'u': case 'v': case 'w': case 'x':
835 case 'y': case 'z':
836 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
837 case 'G': case 'H': case 'I': case 'J': case 'K':
838 case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
839 case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
840 case 'Y': case 'Z':
841 result->type = CPP_NAME;
842 result->val.node = lex_identifier (pfile, buffer->cur - 1);
844 /* Convert named operators to their proper types. */
845 if (result->val.node->flags & NODE_OPERATOR)
847 result->flags |= NAMED_OP;
848 result->type = result->val.node->directive_index;
850 break;
852 case '\'':
853 case '"':
854 lex_string (pfile, result, buffer->cur - 1);
855 break;
857 case '/':
858 /* A potential block or line comment. */
859 comment_start = buffer->cur;
860 c = *buffer->cur;
862 if (c == '*')
864 if (_cpp_skip_block_comment (pfile))
865 cpp_error (pfile, DL_ERROR, "unterminated comment");
867 else if (c == '/' && (CPP_OPTION (pfile, cplusplus_comments)
868 || CPP_IN_SYSTEM_HEADER (pfile)))
870 /* Warn about comments only if pedantically GNUC89, and not
871 in system headers. */
872 if (CPP_OPTION (pfile, lang) == CLK_GNUC89 && CPP_PEDANTIC (pfile)
873 && ! buffer->warned_cplusplus_comments)
875 cpp_error (pfile, DL_PEDWARN,
876 "C++ style comments are not allowed in ISO C90");
877 cpp_error (pfile, DL_PEDWARN,
878 "(this will be reported only once per input file)");
879 buffer->warned_cplusplus_comments = 1;
882 if (skip_line_comment (pfile) && CPP_OPTION (pfile, warn_comments))
883 cpp_error (pfile, DL_WARNING, "multi-line comment");
885 else if (c == '=')
887 buffer->cur++;
888 result->type = CPP_DIV_EQ;
889 break;
891 else
893 result->type = CPP_DIV;
894 break;
897 if (!pfile->state.save_comments)
899 result->flags |= PREV_WHITE;
900 goto update_tokens_line;
903 /* Save the comment as a token in its own right. */
904 save_comment (pfile, result, comment_start, c);
905 break;
907 case '<':
908 if (pfile->state.angled_headers)
910 lex_string (pfile, result, buffer->cur - 1);
911 break;
914 result->type = CPP_LESS;
915 if (*buffer->cur == '=')
916 buffer->cur++, result->type = CPP_LESS_EQ;
917 else if (*buffer->cur == '<')
919 buffer->cur++;
920 IF_NEXT_IS ('=', CPP_LSHIFT_EQ, CPP_LSHIFT);
922 else if (*buffer->cur == '?' && CPP_OPTION (pfile, cplusplus))
924 buffer->cur++;
925 IF_NEXT_IS ('=', CPP_MIN_EQ, CPP_MIN);
927 else if (CPP_OPTION (pfile, digraphs))
929 if (*buffer->cur == ':')
931 buffer->cur++;
932 result->flags |= DIGRAPH;
933 result->type = CPP_OPEN_SQUARE;
935 else if (*buffer->cur == '%')
937 buffer->cur++;
938 result->flags |= DIGRAPH;
939 result->type = CPP_OPEN_BRACE;
942 break;
944 case '>':
945 result->type = CPP_GREATER;
946 if (*buffer->cur == '=')
947 buffer->cur++, result->type = CPP_GREATER_EQ;
948 else if (*buffer->cur == '>')
950 buffer->cur++;
951 IF_NEXT_IS ('=', CPP_RSHIFT_EQ, CPP_RSHIFT);
953 else if (*buffer->cur == '?' && CPP_OPTION (pfile, cplusplus))
955 buffer->cur++;
956 IF_NEXT_IS ('=', CPP_MAX_EQ, CPP_MAX);
958 break;
960 case '%':
961 result->type = CPP_MOD;
962 if (*buffer->cur == '=')
963 buffer->cur++, result->type = CPP_MOD_EQ;
964 else if (CPP_OPTION (pfile, digraphs))
966 if (*buffer->cur == ':')
968 buffer->cur++;
969 result->flags |= DIGRAPH;
970 result->type = CPP_HASH;
971 if (*buffer->cur == '%' && buffer->cur[1] == ':')
972 buffer->cur += 2, result->type = CPP_PASTE;
974 else if (*buffer->cur == '>')
976 buffer->cur++;
977 result->flags |= DIGRAPH;
978 result->type = CPP_CLOSE_BRACE;
981 break;
983 case '.':
984 result->type = CPP_DOT;
985 if (ISDIGIT (*buffer->cur))
987 result->type = CPP_NUMBER;
988 lex_number (pfile, &result->val.str);
990 else if (*buffer->cur == '.' && buffer->cur[1] == '.')
991 buffer->cur += 2, result->type = CPP_ELLIPSIS;
992 else if (*buffer->cur == '*' && CPP_OPTION (pfile, cplusplus))
993 buffer->cur++, result->type = CPP_DOT_STAR;
994 break;
996 case '+':
997 result->type = CPP_PLUS;
998 if (*buffer->cur == '+')
999 buffer->cur++, result->type = CPP_PLUS_PLUS;
1000 else if (*buffer->cur == '=')
1001 buffer->cur++, result->type = CPP_PLUS_EQ;
1002 break;
1004 case '-':
1005 result->type = CPP_MINUS;
1006 if (*buffer->cur == '>')
1008 buffer->cur++;
1009 result->type = CPP_DEREF;
1010 if (*buffer->cur == '*' && CPP_OPTION (pfile, cplusplus))
1011 buffer->cur++, result->type = CPP_DEREF_STAR;
1013 else if (*buffer->cur == '-')
1014 buffer->cur++, result->type = CPP_MINUS_MINUS;
1015 else if (*buffer->cur == '=')
1016 buffer->cur++, result->type = CPP_MINUS_EQ;
1017 break;
1019 case '&':
1020 result->type = CPP_AND;
1021 if (*buffer->cur == '&')
1022 buffer->cur++, result->type = CPP_AND_AND;
1023 else if (*buffer->cur == '=')
1024 buffer->cur++, result->type = CPP_AND_EQ;
1025 break;
1027 case '|':
1028 result->type = CPP_OR;
1029 if (*buffer->cur == '|')
1030 buffer->cur++, result->type = CPP_OR_OR;
1031 else if (*buffer->cur == '=')
1032 buffer->cur++, result->type = CPP_OR_EQ;
1033 break;
1035 case ':':
1036 result->type = CPP_COLON;
1037 if (*buffer->cur == ':' && CPP_OPTION (pfile, cplusplus))
1038 buffer->cur++, result->type = CPP_SCOPE;
1039 else if (*buffer->cur == '>' && CPP_OPTION (pfile, digraphs))
1041 buffer->cur++;
1042 result->flags |= DIGRAPH;
1043 result->type = CPP_CLOSE_SQUARE;
1045 break;
1047 case '*': IF_NEXT_IS ('=', CPP_MULT_EQ, CPP_MULT); break;
1048 case '=': IF_NEXT_IS ('=', CPP_EQ_EQ, CPP_EQ); break;
1049 case '!': IF_NEXT_IS ('=', CPP_NOT_EQ, CPP_NOT); break;
1050 case '^': IF_NEXT_IS ('=', CPP_XOR_EQ, CPP_XOR); break;
1051 case '#': IF_NEXT_IS ('#', CPP_PASTE, CPP_HASH); break;
1053 case '?': result->type = CPP_QUERY; break;
1054 case '~': result->type = CPP_COMPL; break;
1055 case ',': result->type = CPP_COMMA; break;
1056 case '(': result->type = CPP_OPEN_PAREN; break;
1057 case ')': result->type = CPP_CLOSE_PAREN; break;
1058 case '[': result->type = CPP_OPEN_SQUARE; break;
1059 case ']': result->type = CPP_CLOSE_SQUARE; break;
1060 case '{': result->type = CPP_OPEN_BRACE; break;
1061 case '}': result->type = CPP_CLOSE_BRACE; break;
1062 case ';': result->type = CPP_SEMICOLON; break;
1064 /* @ is a punctuator in Objective-C. */
1065 case '@': result->type = CPP_ATSIGN; break;
1067 case '$':
1068 case '\\':
1070 const uchar *base = --buffer->cur;
1072 if (forms_identifier_p (pfile, true))
1074 result->type = CPP_NAME;
1075 result->val.node = lex_identifier (pfile, base);
1076 break;
1078 buffer->cur++;
1081 default:
1082 create_literal (pfile, result, buffer->cur - 1, 1, CPP_OTHER);
1083 break;
1086 return result;
1089 /* An upper bound on the number of bytes needed to spell TOKEN.
1090 Does not include preceding whitespace. */
1091 unsigned int
1092 cpp_token_len (const cpp_token *token)
1094 unsigned int len;
1096 switch (TOKEN_SPELL (token))
1098 default: len = 4; break;
1099 case SPELL_LITERAL: len = token->val.str.len; break;
1100 case SPELL_IDENT: len = NODE_LEN (token->val.node); break;
1103 return len;
1106 /* Write the spelling of a token TOKEN to BUFFER. The buffer must
1107 already contain the enough space to hold the token's spelling.
1108 Returns a pointer to the character after the last character written.
1109 FIXME: Would be nice if we didn't need the PFILE argument. */
1110 unsigned char *
1111 cpp_spell_token (cpp_reader *pfile, const cpp_token *token,
1112 unsigned char *buffer)
1114 switch (TOKEN_SPELL (token))
1116 case SPELL_OPERATOR:
1118 const unsigned char *spelling;
1119 unsigned char c;
1121 if (token->flags & DIGRAPH)
1122 spelling
1123 = digraph_spellings[(int) token->type - (int) CPP_FIRST_DIGRAPH];
1124 else if (token->flags & NAMED_OP)
1125 goto spell_ident;
1126 else
1127 spelling = TOKEN_NAME (token);
1129 while ((c = *spelling++) != '\0')
1130 *buffer++ = c;
1132 break;
1134 spell_ident:
1135 case SPELL_IDENT:
1136 memcpy (buffer, NODE_NAME (token->val.node), NODE_LEN (token->val.node));
1137 buffer += NODE_LEN (token->val.node);
1138 break;
1140 case SPELL_LITERAL:
1141 memcpy (buffer, token->val.str.text, token->val.str.len);
1142 buffer += token->val.str.len;
1143 break;
1145 case SPELL_NONE:
1146 cpp_error (pfile, DL_ICE, "unspellable token %s", TOKEN_NAME (token));
1147 break;
1150 return buffer;
1153 /* Returns TOKEN spelt as a null-terminated string. The string is
1154 freed when the reader is destroyed. Useful for diagnostics. */
1155 unsigned char *
1156 cpp_token_as_text (cpp_reader *pfile, const cpp_token *token)
1158 unsigned int len = cpp_token_len (token) + 1;
1159 unsigned char *start = _cpp_unaligned_alloc (pfile, len), *end;
1161 end = cpp_spell_token (pfile, token, start);
1162 end[0] = '\0';
1164 return start;
1167 /* Used by C front ends, which really should move to using
1168 cpp_token_as_text. */
1169 const char *
1170 cpp_type2name (enum cpp_ttype type)
1172 return (const char *) token_spellings[type].name;
1175 /* Writes the spelling of token to FP, without any preceding space.
1176 Separated from cpp_spell_token for efficiency - to avoid stdio
1177 double-buffering. */
1178 void
1179 cpp_output_token (const cpp_token *token, FILE *fp)
1181 switch (TOKEN_SPELL (token))
1183 case SPELL_OPERATOR:
1185 const unsigned char *spelling;
1186 int c;
1188 if (token->flags & DIGRAPH)
1189 spelling
1190 = digraph_spellings[(int) token->type - (int) CPP_FIRST_DIGRAPH];
1191 else if (token->flags & NAMED_OP)
1192 goto spell_ident;
1193 else
1194 spelling = TOKEN_NAME (token);
1196 c = *spelling;
1198 putc (c, fp);
1199 while ((c = *++spelling) != '\0');
1201 break;
1203 spell_ident:
1204 case SPELL_IDENT:
1205 fwrite (NODE_NAME (token->val.node), 1, NODE_LEN (token->val.node), fp);
1206 break;
1208 case SPELL_LITERAL:
1209 fwrite (token->val.str.text, 1, token->val.str.len, fp);
1210 break;
1212 case SPELL_NONE:
1213 /* An error, most probably. */
1214 break;
1218 /* Compare two tokens. */
1220 _cpp_equiv_tokens (const cpp_token *a, const cpp_token *b)
1222 if (a->type == b->type && a->flags == b->flags)
1223 switch (TOKEN_SPELL (a))
1225 default: /* Keep compiler happy. */
1226 case SPELL_OPERATOR:
1227 return 1;
1228 case SPELL_NONE:
1229 return (a->type != CPP_MACRO_ARG || a->val.arg_no == b->val.arg_no);
1230 case SPELL_IDENT:
1231 return a->val.node == b->val.node;
1232 case SPELL_LITERAL:
1233 return (a->val.str.len == b->val.str.len
1234 && !memcmp (a->val.str.text, b->val.str.text,
1235 a->val.str.len));
1238 return 0;
1241 /* Returns nonzero if a space should be inserted to avoid an
1242 accidental token paste for output. For simplicity, it is
1243 conservative, and occasionally advises a space where one is not
1244 needed, e.g. "." and ".2". */
1246 cpp_avoid_paste (cpp_reader *pfile, const cpp_token *token1,
1247 const cpp_token *token2)
1249 enum cpp_ttype a = token1->type, b = token2->type;
1250 cppchar_t c;
1252 if (token1->flags & NAMED_OP)
1253 a = CPP_NAME;
1254 if (token2->flags & NAMED_OP)
1255 b = CPP_NAME;
1257 c = EOF;
1258 if (token2->flags & DIGRAPH)
1259 c = digraph_spellings[(int) b - (int) CPP_FIRST_DIGRAPH][0];
1260 else if (token_spellings[b].category == SPELL_OPERATOR)
1261 c = token_spellings[b].name[0];
1263 /* Quickly get everything that can paste with an '='. */
1264 if ((int) a <= (int) CPP_LAST_EQ && c == '=')
1265 return 1;
1267 switch (a)
1269 case CPP_GREATER: return c == '>' || c == '?';
1270 case CPP_LESS: return c == '<' || c == '?' || c == '%' || c == ':';
1271 case CPP_PLUS: return c == '+';
1272 case CPP_MINUS: return c == '-' || c == '>';
1273 case CPP_DIV: return c == '/' || c == '*'; /* Comments. */
1274 case CPP_MOD: return c == ':' || c == '>';
1275 case CPP_AND: return c == '&';
1276 case CPP_OR: return c == '|';
1277 case CPP_COLON: return c == ':' || c == '>';
1278 case CPP_DEREF: return c == '*';
1279 case CPP_DOT: return c == '.' || c == '%' || b == CPP_NUMBER;
1280 case CPP_HASH: return c == '#' || c == '%'; /* Digraph form. */
1281 case CPP_NAME: return ((b == CPP_NUMBER
1282 && name_p (pfile, &token2->val.str))
1283 || b == CPP_NAME
1284 || b == CPP_CHAR || b == CPP_STRING); /* L */
1285 case CPP_NUMBER: return (b == CPP_NUMBER || b == CPP_NAME
1286 || c == '.' || c == '+' || c == '-');
1287 /* UCNs */
1288 case CPP_OTHER: return ((token1->val.str.text[0] == '\\'
1289 && b == CPP_NAME)
1290 || (CPP_OPTION (pfile, objc)
1291 && token1->val.str.text[0] == '@'
1292 && (b == CPP_NAME || b == CPP_STRING)));
1293 default: break;
1296 return 0;
1299 /* Output all the remaining tokens on the current line, and a newline
1300 character, to FP. Leading whitespace is removed. If there are
1301 macros, special token padding is not performed. */
1302 void
1303 cpp_output_line (cpp_reader *pfile, FILE *fp)
1305 const cpp_token *token;
1307 token = cpp_get_token (pfile);
1308 while (token->type != CPP_EOF)
1310 cpp_output_token (token, fp);
1311 token = cpp_get_token (pfile);
1312 if (token->flags & PREV_WHITE)
1313 putc (' ', fp);
1316 putc ('\n', fp);
1319 /* Memory buffers. Changing these three constants can have a dramatic
1320 effect on performance. The values here are reasonable defaults,
1321 but might be tuned. If you adjust them, be sure to test across a
1322 range of uses of cpplib, including heavy nested function-like macro
1323 expansion. Also check the change in peak memory usage (NJAMD is a
1324 good tool for this). */
1325 #define MIN_BUFF_SIZE 8000
1326 #define BUFF_SIZE_UPPER_BOUND(MIN_SIZE) (MIN_BUFF_SIZE + (MIN_SIZE) * 3 / 2)
1327 #define EXTENDED_BUFF_SIZE(BUFF, MIN_EXTRA) \
1328 (MIN_EXTRA + ((BUFF)->limit - (BUFF)->cur) * 2)
1330 #if MIN_BUFF_SIZE > BUFF_SIZE_UPPER_BOUND (0)
1331 #error BUFF_SIZE_UPPER_BOUND must be at least as large as MIN_BUFF_SIZE!
1332 #endif
1334 /* Create a new allocation buffer. Place the control block at the end
1335 of the buffer, so that buffer overflows will cause immediate chaos. */
1336 static _cpp_buff *
1337 new_buff (size_t len)
1339 _cpp_buff *result;
1340 unsigned char *base;
1342 if (len < MIN_BUFF_SIZE)
1343 len = MIN_BUFF_SIZE;
1344 len = CPP_ALIGN (len);
1346 base = xmalloc (len + sizeof (_cpp_buff));
1347 result = (_cpp_buff *) (base + len);
1348 result->base = base;
1349 result->cur = base;
1350 result->limit = base + len;
1351 result->next = NULL;
1352 return result;
1355 /* Place a chain of unwanted allocation buffers on the free list. */
1356 void
1357 _cpp_release_buff (cpp_reader *pfile, _cpp_buff *buff)
1359 _cpp_buff *end = buff;
1361 while (end->next)
1362 end = end->next;
1363 end->next = pfile->free_buffs;
1364 pfile->free_buffs = buff;
1367 /* Return a free buffer of size at least MIN_SIZE. */
1368 _cpp_buff *
1369 _cpp_get_buff (cpp_reader *pfile, size_t min_size)
1371 _cpp_buff *result, **p;
1373 for (p = &pfile->free_buffs;; p = &(*p)->next)
1375 size_t size;
1377 if (*p == NULL)
1378 return new_buff (min_size);
1379 result = *p;
1380 size = result->limit - result->base;
1381 /* Return a buffer that's big enough, but don't waste one that's
1382 way too big. */
1383 if (size >= min_size && size <= BUFF_SIZE_UPPER_BOUND (min_size))
1384 break;
1387 *p = result->next;
1388 result->next = NULL;
1389 result->cur = result->base;
1390 return result;
1393 /* Creates a new buffer with enough space to hold the uncommitted
1394 remaining bytes of BUFF, and at least MIN_EXTRA more bytes. Copies
1395 the excess bytes to the new buffer. Chains the new buffer after
1396 BUFF, and returns the new buffer. */
1397 _cpp_buff *
1398 _cpp_append_extend_buff (cpp_reader *pfile, _cpp_buff *buff, size_t min_extra)
1400 size_t size = EXTENDED_BUFF_SIZE (buff, min_extra);
1401 _cpp_buff *new_buff = _cpp_get_buff (pfile, size);
1403 buff->next = new_buff;
1404 memcpy (new_buff->base, buff->cur, BUFF_ROOM (buff));
1405 return new_buff;
1408 /* Creates a new buffer with enough space to hold the uncommitted
1409 remaining bytes of the buffer pointed to by BUFF, and at least
1410 MIN_EXTRA more bytes. Copies the excess bytes to the new buffer.
1411 Chains the new buffer before the buffer pointed to by BUFF, and
1412 updates the pointer to point to the new buffer. */
1413 void
1414 _cpp_extend_buff (cpp_reader *pfile, _cpp_buff **pbuff, size_t min_extra)
1416 _cpp_buff *new_buff, *old_buff = *pbuff;
1417 size_t size = EXTENDED_BUFF_SIZE (old_buff, min_extra);
1419 new_buff = _cpp_get_buff (pfile, size);
1420 memcpy (new_buff->base, old_buff->cur, BUFF_ROOM (old_buff));
1421 new_buff->next = old_buff;
1422 *pbuff = new_buff;
1425 /* Free a chain of buffers starting at BUFF. */
1426 void
1427 _cpp_free_buff (_cpp_buff *buff)
1429 _cpp_buff *next;
1431 for (; buff; buff = next)
1433 next = buff->next;
1434 free (buff->base);
1438 /* Allocate permanent, unaligned storage of length LEN. */
1439 unsigned char *
1440 _cpp_unaligned_alloc (cpp_reader *pfile, size_t len)
1442 _cpp_buff *buff = pfile->u_buff;
1443 unsigned char *result = buff->cur;
1445 if (len > (size_t) (buff->limit - result))
1447 buff = _cpp_get_buff (pfile, len);
1448 buff->next = pfile->u_buff;
1449 pfile->u_buff = buff;
1450 result = buff->cur;
1453 buff->cur = result + len;
1454 return result;
1457 /* Allocate permanent, unaligned storage of length LEN from a_buff.
1458 That buffer is used for growing allocations when saving macro
1459 replacement lists in a #define, and when parsing an answer to an
1460 assertion in #assert, #unassert or #if (and therefore possibly
1461 whilst expanding macros). It therefore must not be used by any
1462 code that they might call: specifically the lexer and the guts of
1463 the macro expander.
1465 All existing other uses clearly fit this restriction: storing
1466 registered pragmas during initialization. */
1467 unsigned char *
1468 _cpp_aligned_alloc (cpp_reader *pfile, size_t len)
1470 _cpp_buff *buff = pfile->a_buff;
1471 unsigned char *result = buff->cur;
1473 if (len > (size_t) (buff->limit - result))
1475 buff = _cpp_get_buff (pfile, len);
1476 buff->next = pfile->a_buff;
1477 pfile->a_buff = buff;
1478 result = buff->cur;
1481 buff->cur = result + len;
1482 return result;