* config/bfin/bfin.md (doloop_end): FAIL if counter reg isn't SImode.
[official-gcc.git] / libcpp / lex.c
blob296b39998115953edddd760c26c164c1c0cd15f7
1 /* CPP Library - lexical analysis.
2 Copyright (C) 2000, 2001, 2002, 2003, 2004, 2005 Free Software Foundation, Inc.
3 Contributed by Per Bothner, 1994-95.
4 Based on CCCP program by Paul Rubin, June 1986
5 Adapted to ANSI C, Richard Stallman, Jan 1987
6 Broken out to separate file, Zack Weinberg, Mar 2000
8 This program is free software; you can redistribute it and/or modify it
9 under the terms of the GNU General Public License as published by the
10 Free Software Foundation; either version 2, or (at your option) any
11 later version.
13 This program is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with this program; if not, write to the Free Software
20 Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
22 #include "config.h"
23 #include "system.h"
24 #include "cpplib.h"
25 #include "internal.h"
27 enum spell_type
29 SPELL_OPERATOR = 0,
30 SPELL_IDENT,
31 SPELL_LITERAL,
32 SPELL_NONE
35 struct token_spelling
37 enum spell_type category;
38 const unsigned char *name;
41 static const unsigned char *const digraph_spellings[] =
42 { U"%:", U"%:%:", U"<:", U":>", U"<%", U"%>" };
44 #define OP(e, s) { SPELL_OPERATOR, U s },
45 #define TK(e, s) { SPELL_ ## s, U #e },
46 static const struct token_spelling token_spellings[N_TTYPES] = { TTYPE_TABLE };
47 #undef OP
48 #undef TK
50 #define TOKEN_SPELL(token) (token_spellings[(token)->type].category)
51 #define TOKEN_NAME(token) (token_spellings[(token)->type].name)
53 static void add_line_note (cpp_buffer *, const uchar *, unsigned int);
54 static int skip_line_comment (cpp_reader *);
55 static void skip_whitespace (cpp_reader *, cppchar_t);
56 static void lex_string (cpp_reader *, cpp_token *, const uchar *);
57 static void save_comment (cpp_reader *, cpp_token *, const uchar *, cppchar_t);
58 static void create_literal (cpp_reader *, cpp_token *, const uchar *,
59 unsigned int, enum cpp_ttype);
60 static bool warn_in_comment (cpp_reader *, _cpp_line_note *);
61 static int name_p (cpp_reader *, const cpp_string *);
62 static tokenrun *next_tokenrun (tokenrun *);
64 static _cpp_buff *new_buff (size_t);
67 /* Utility routine:
69 Compares, the token TOKEN to the NUL-terminated string STRING.
70 TOKEN must be a CPP_NAME. Returns 1 for equal, 0 for unequal. */
71 int
72 cpp_ideq (const cpp_token *token, const char *string)
74 if (token->type != CPP_NAME)
75 return 0;
77 return !ustrcmp (NODE_NAME (token->val.node), (const uchar *) string);
80 /* Record a note TYPE at byte POS into the current cleaned logical
81 line. */
82 static void
83 add_line_note (cpp_buffer *buffer, const uchar *pos, unsigned int type)
85 if (buffer->notes_used == buffer->notes_cap)
87 buffer->notes_cap = buffer->notes_cap * 2 + 200;
88 buffer->notes = XRESIZEVEC (_cpp_line_note, buffer->notes,
89 buffer->notes_cap);
92 buffer->notes[buffer->notes_used].pos = pos;
93 buffer->notes[buffer->notes_used].type = type;
94 buffer->notes_used++;
97 /* Returns with a logical line that contains no escaped newlines or
98 trigraphs. This is a time-critical inner loop. */
99 void
100 _cpp_clean_line (cpp_reader *pfile)
102 cpp_buffer *buffer;
103 const uchar *s;
104 uchar c, *d, *p;
106 buffer = pfile->buffer;
107 buffer->cur_note = buffer->notes_used = 0;
108 buffer->cur = buffer->line_base = buffer->next_line;
109 buffer->need_line = false;
110 s = buffer->next_line - 1;
112 if (!buffer->from_stage3)
114 const uchar *pbackslash = NULL;
116 /* Short circuit for the common case of an un-escaped line with
117 no trigraphs. The primary win here is by not writing any
118 data back to memory until we have to. */
119 for (;;)
121 c = *++s;
122 if (__builtin_expect (c == '\n', false)
123 || __builtin_expect (c == '\r', false))
125 d = (uchar *) s;
127 if (__builtin_expect (s == buffer->rlimit, false))
128 goto done;
130 /* DOS line ending? */
131 if (__builtin_expect (c == '\r', false)
132 && s[1] == '\n')
134 s++;
135 if (s == buffer->rlimit)
136 goto done;
139 if (__builtin_expect (pbackslash == NULL, true))
140 goto done;
142 /* Check for escaped newline. */
143 p = d;
144 while (is_nvspace (p[-1]))
145 p--;
146 if (p - 1 != pbackslash)
147 goto done;
149 /* Have an escaped newline; process it and proceed to
150 the slow path. */
151 add_line_note (buffer, p - 1, p != d ? ' ' : '\\');
152 d = p - 2;
153 buffer->next_line = p - 1;
154 break;
156 if (__builtin_expect (c == '\\', false))
157 pbackslash = s;
158 else if (__builtin_expect (c == '?', false)
159 && __builtin_expect (s[1] == '?', false)
160 && _cpp_trigraph_map[s[2]])
162 /* Have a trigraph. We may or may not have to convert
163 it. Add a line note regardless, for -Wtrigraphs. */
164 add_line_note (buffer, s, s[2]);
165 if (CPP_OPTION (pfile, trigraphs))
167 /* We do, and that means we have to switch to the
168 slow path. */
169 d = (uchar *) s;
170 *d = _cpp_trigraph_map[s[2]];
171 s += 2;
172 break;
178 for (;;)
180 c = *++s;
181 *++d = c;
183 if (c == '\n' || c == '\r')
185 /* Handle DOS line endings. */
186 if (c == '\r' && s != buffer->rlimit && s[1] == '\n')
187 s++;
188 if (s == buffer->rlimit)
189 break;
191 /* Escaped? */
192 p = d;
193 while (p != buffer->next_line && is_nvspace (p[-1]))
194 p--;
195 if (p == buffer->next_line || p[-1] != '\\')
196 break;
198 add_line_note (buffer, p - 1, p != d ? ' ': '\\');
199 d = p - 2;
200 buffer->next_line = p - 1;
202 else if (c == '?' && s[1] == '?' && _cpp_trigraph_map[s[2]])
204 /* Add a note regardless, for the benefit of -Wtrigraphs. */
205 add_line_note (buffer, d, s[2]);
206 if (CPP_OPTION (pfile, trigraphs))
208 *d = _cpp_trigraph_map[s[2]];
209 s += 2;
214 else
217 s++;
218 while (*s != '\n' && *s != '\r');
219 d = (uchar *) s;
221 /* Handle DOS line endings. */
222 if (*s == '\r' && s != buffer->rlimit && s[1] == '\n')
223 s++;
226 done:
227 *d = '\n';
228 /* A sentinel note that should never be processed. */
229 add_line_note (buffer, d + 1, '\n');
230 buffer->next_line = s + 1;
233 /* Return true if the trigraph indicated by NOTE should be warned
234 about in a comment. */
235 static bool
236 warn_in_comment (cpp_reader *pfile, _cpp_line_note *note)
238 const uchar *p;
240 /* Within comments we don't warn about trigraphs, unless the
241 trigraph forms an escaped newline, as that may change
242 behavior. */
243 if (note->type != '/')
244 return false;
246 /* If -trigraphs, then this was an escaped newline iff the next note
247 is coincident. */
248 if (CPP_OPTION (pfile, trigraphs))
249 return note[1].pos == note->pos;
251 /* Otherwise, see if this forms an escaped newline. */
252 p = note->pos + 3;
253 while (is_nvspace (*p))
254 p++;
256 /* There might have been escaped newlines between the trigraph and the
257 newline we found. Hence the position test. */
258 return (*p == '\n' && p < note[1].pos);
261 /* Process the notes created by add_line_note as far as the current
262 location. */
263 void
264 _cpp_process_line_notes (cpp_reader *pfile, int in_comment)
266 cpp_buffer *buffer = pfile->buffer;
268 for (;;)
270 _cpp_line_note *note = &buffer->notes[buffer->cur_note];
271 unsigned int col;
273 if (note->pos > buffer->cur)
274 break;
276 buffer->cur_note++;
277 col = CPP_BUF_COLUMN (buffer, note->pos + 1);
279 if (note->type == '\\' || note->type == ' ')
281 if (note->type == ' ' && !in_comment)
282 cpp_error_with_line (pfile, CPP_DL_WARNING, pfile->line_table->highest_line, col,
283 "backslash and newline separated by space");
285 if (buffer->next_line > buffer->rlimit)
287 cpp_error_with_line (pfile, CPP_DL_PEDWARN, pfile->line_table->highest_line, col,
288 "backslash-newline at end of file");
289 /* Prevent "no newline at end of file" warning. */
290 buffer->next_line = buffer->rlimit;
293 buffer->line_base = note->pos;
294 CPP_INCREMENT_LINE (pfile, 0);
296 else if (_cpp_trigraph_map[note->type])
298 if (CPP_OPTION (pfile, warn_trigraphs)
299 && (!in_comment || warn_in_comment (pfile, note)))
301 if (CPP_OPTION (pfile, trigraphs))
302 cpp_error_with_line (pfile, CPP_DL_WARNING, pfile->line_table->highest_line, col,
303 "trigraph ??%c converted to %c",
304 note->type,
305 (int) _cpp_trigraph_map[note->type]);
306 else
308 cpp_error_with_line
309 (pfile, CPP_DL_WARNING, pfile->line_table->highest_line, col,
310 "trigraph ??%c ignored, use -trigraphs to enable",
311 note->type);
315 else
316 abort ();
320 /* Skip a C-style block comment. We find the end of the comment by
321 seeing if an asterisk is before every '/' we encounter. Returns
322 nonzero if comment terminated by EOF, zero otherwise.
324 Buffer->cur points to the initial asterisk of the comment. */
325 bool
326 _cpp_skip_block_comment (cpp_reader *pfile)
328 cpp_buffer *buffer = pfile->buffer;
329 const uchar *cur = buffer->cur;
330 uchar c;
332 cur++;
333 if (*cur == '/')
334 cur++;
336 for (;;)
338 /* People like decorating comments with '*', so check for '/'
339 instead for efficiency. */
340 c = *cur++;
342 if (c == '/')
344 if (cur[-2] == '*')
345 break;
347 /* Warn about potential nested comments, but not if the '/'
348 comes immediately before the true comment delimiter.
349 Don't bother to get it right across escaped newlines. */
350 if (CPP_OPTION (pfile, warn_comments)
351 && cur[0] == '*' && cur[1] != '/')
353 buffer->cur = cur;
354 cpp_error_with_line (pfile, CPP_DL_WARNING,
355 pfile->line_table->highest_line, CPP_BUF_COL (buffer),
356 "\"/*\" within comment");
359 else if (c == '\n')
361 unsigned int cols;
362 buffer->cur = cur - 1;
363 _cpp_process_line_notes (pfile, true);
364 if (buffer->next_line >= buffer->rlimit)
365 return true;
366 _cpp_clean_line (pfile);
368 cols = buffer->next_line - buffer->line_base;
369 CPP_INCREMENT_LINE (pfile, cols);
371 cur = buffer->cur;
375 buffer->cur = cur;
376 _cpp_process_line_notes (pfile, true);
377 return false;
380 /* Skip a C++ line comment, leaving buffer->cur pointing to the
381 terminating newline. Handles escaped newlines. Returns nonzero
382 if a multiline comment. */
383 static int
384 skip_line_comment (cpp_reader *pfile)
386 cpp_buffer *buffer = pfile->buffer;
387 unsigned int orig_line = pfile->line_table->highest_line;
389 while (*buffer->cur != '\n')
390 buffer->cur++;
392 _cpp_process_line_notes (pfile, true);
393 return orig_line != pfile->line_table->highest_line;
396 /* Skips whitespace, saving the next non-whitespace character. */
397 static void
398 skip_whitespace (cpp_reader *pfile, cppchar_t c)
400 cpp_buffer *buffer = pfile->buffer;
401 bool saw_NUL = false;
405 /* Horizontal space always OK. */
406 if (c == ' ' || c == '\t')
408 /* Just \f \v or \0 left. */
409 else if (c == '\0')
410 saw_NUL = true;
411 else if (pfile->state.in_directive && CPP_PEDANTIC (pfile))
412 cpp_error_with_line (pfile, CPP_DL_PEDWARN, pfile->line_table->highest_line,
413 CPP_BUF_COL (buffer),
414 "%s in preprocessing directive",
415 c == '\f' ? "form feed" : "vertical tab");
417 c = *buffer->cur++;
419 /* We only want non-vertical space, i.e. ' ' \t \f \v \0. */
420 while (is_nvspace (c));
422 if (saw_NUL)
423 cpp_error (pfile, CPP_DL_WARNING, "null character(s) ignored");
425 buffer->cur--;
428 /* See if the characters of a number token are valid in a name (no
429 '.', '+' or '-'). */
430 static int
431 name_p (cpp_reader *pfile, const cpp_string *string)
433 unsigned int i;
435 for (i = 0; i < string->len; i++)
436 if (!is_idchar (string->text[i]))
437 return 0;
439 return 1;
442 /* After parsing an identifier or other sequence, produce a warning about
443 sequences not in NFC/NFKC. */
444 static void
445 warn_about_normalization (cpp_reader *pfile,
446 const cpp_token *token,
447 const struct normalize_state *s)
449 if (CPP_OPTION (pfile, warn_normalize) < NORMALIZE_STATE_RESULT (s)
450 && !pfile->state.skipping)
452 /* Make sure that the token is printed using UCNs, even
453 if we'd otherwise happily print UTF-8. */
454 unsigned char *buf = XNEWVEC (unsigned char, cpp_token_len (token));
455 size_t sz;
457 sz = cpp_spell_token (pfile, token, buf, false) - buf;
458 if (NORMALIZE_STATE_RESULT (s) == normalized_C)
459 cpp_error_with_line (pfile, CPP_DL_WARNING, token->src_loc, 0,
460 "`%.*s' is not in NFKC", (int) sz, buf);
461 else
462 cpp_error_with_line (pfile, CPP_DL_WARNING, token->src_loc, 0,
463 "`%.*s' is not in NFC", (int) sz, buf);
467 /* Returns TRUE if the sequence starting at buffer->cur is invalid in
468 an identifier. FIRST is TRUE if this starts an identifier. */
469 static bool
470 forms_identifier_p (cpp_reader *pfile, int first,
471 struct normalize_state *state)
473 cpp_buffer *buffer = pfile->buffer;
475 if (*buffer->cur == '$')
477 if (!CPP_OPTION (pfile, dollars_in_ident))
478 return false;
480 buffer->cur++;
481 if (CPP_OPTION (pfile, warn_dollars) && !pfile->state.skipping)
483 CPP_OPTION (pfile, warn_dollars) = 0;
484 cpp_error (pfile, CPP_DL_PEDWARN, "'$' in identifier or number");
487 return true;
490 /* Is this a syntactically valid UCN? */
491 if (CPP_OPTION (pfile, extended_identifiers)
492 && *buffer->cur == '\\'
493 && (buffer->cur[1] == 'u' || buffer->cur[1] == 'U'))
495 buffer->cur += 2;
496 if (_cpp_valid_ucn (pfile, &buffer->cur, buffer->rlimit, 1 + !first,
497 state))
498 return true;
499 buffer->cur -= 2;
502 return false;
505 /* Lex an identifier starting at BUFFER->CUR - 1. */
506 static cpp_hashnode *
507 lex_identifier (cpp_reader *pfile, const uchar *base, bool starts_ucn,
508 struct normalize_state *nst)
510 cpp_hashnode *result;
511 const uchar *cur;
512 unsigned int len;
513 unsigned int hash = HT_HASHSTEP (0, *base);
515 cur = pfile->buffer->cur;
516 if (! starts_ucn)
517 while (ISIDNUM (*cur))
519 hash = HT_HASHSTEP (hash, *cur);
520 cur++;
522 pfile->buffer->cur = cur;
523 if (starts_ucn || forms_identifier_p (pfile, false, nst))
525 /* Slower version for identifiers containing UCNs (or $). */
526 do {
527 while (ISIDNUM (*pfile->buffer->cur))
529 pfile->buffer->cur++;
530 NORMALIZE_STATE_UPDATE_IDNUM (nst);
532 } while (forms_identifier_p (pfile, false, nst));
533 result = _cpp_interpret_identifier (pfile, base,
534 pfile->buffer->cur - base);
536 else
538 len = cur - base;
539 hash = HT_HASHFINISH (hash, len);
541 result = (cpp_hashnode *)
542 ht_lookup_with_hash (pfile->hash_table, base, len, hash, HT_ALLOC);
545 /* Rarely, identifiers require diagnostics when lexed. */
546 if (__builtin_expect ((result->flags & NODE_DIAGNOSTIC)
547 && !pfile->state.skipping, 0))
549 /* It is allowed to poison the same identifier twice. */
550 if ((result->flags & NODE_POISONED) && !pfile->state.poisoned_ok)
551 cpp_error (pfile, CPP_DL_ERROR, "attempt to use poisoned \"%s\"",
552 NODE_NAME (result));
554 /* Constraint 6.10.3.5: __VA_ARGS__ should only appear in the
555 replacement list of a variadic macro. */
556 if (result == pfile->spec_nodes.n__VA_ARGS__
557 && !pfile->state.va_args_ok)
558 cpp_error (pfile, CPP_DL_PEDWARN,
559 "__VA_ARGS__ can only appear in the expansion"
560 " of a C99 variadic macro");
563 return result;
566 /* Lex a number to NUMBER starting at BUFFER->CUR - 1. */
567 static void
568 lex_number (cpp_reader *pfile, cpp_string *number,
569 struct normalize_state *nst)
571 const uchar *cur;
572 const uchar *base;
573 uchar *dest;
575 base = pfile->buffer->cur - 1;
578 cur = pfile->buffer->cur;
580 /* N.B. ISIDNUM does not include $. */
581 while (ISIDNUM (*cur) || *cur == '.' || VALID_SIGN (*cur, cur[-1]))
583 cur++;
584 NORMALIZE_STATE_UPDATE_IDNUM (nst);
587 pfile->buffer->cur = cur;
589 while (forms_identifier_p (pfile, false, nst));
591 number->len = cur - base;
592 dest = _cpp_unaligned_alloc (pfile, number->len + 1);
593 memcpy (dest, base, number->len);
594 dest[number->len] = '\0';
595 number->text = dest;
598 /* Create a token of type TYPE with a literal spelling. */
599 static void
600 create_literal (cpp_reader *pfile, cpp_token *token, const uchar *base,
601 unsigned int len, enum cpp_ttype type)
603 uchar *dest = _cpp_unaligned_alloc (pfile, len + 1);
605 memcpy (dest, base, len);
606 dest[len] = '\0';
607 token->type = type;
608 token->val.str.len = len;
609 token->val.str.text = dest;
612 /* Lexes a string, character constant, or angle-bracketed header file
613 name. The stored string contains the spelling, including opening
614 quote and leading any leading 'L'. It returns the type of the
615 literal, or CPP_OTHER if it was not properly terminated.
617 The spelling is NUL-terminated, but it is not guaranteed that this
618 is the first NUL since embedded NULs are preserved. */
619 static void
620 lex_string (cpp_reader *pfile, cpp_token *token, const uchar *base)
622 bool saw_NUL = false;
623 const uchar *cur;
624 cppchar_t terminator;
625 enum cpp_ttype type;
627 cur = base;
628 terminator = *cur++;
629 if (terminator == 'L')
630 terminator = *cur++;
631 if (terminator == '\"')
632 type = *base == 'L' ? CPP_WSTRING: CPP_STRING;
633 else if (terminator == '\'')
634 type = *base == 'L' ? CPP_WCHAR: CPP_CHAR;
635 else
636 terminator = '>', type = CPP_HEADER_NAME;
638 for (;;)
640 cppchar_t c = *cur++;
642 /* In #include-style directives, terminators are not escapable. */
643 if (c == '\\' && !pfile->state.angled_headers && *cur != '\n')
644 cur++;
645 else if (c == terminator)
646 break;
647 else if (c == '\n')
649 cur--;
650 type = CPP_OTHER;
651 break;
653 else if (c == '\0')
654 saw_NUL = true;
657 if (saw_NUL && !pfile->state.skipping)
658 cpp_error (pfile, CPP_DL_WARNING,
659 "null character(s) preserved in literal");
661 if (type == CPP_OTHER && CPP_OPTION (pfile, lang) != CLK_ASM)
662 cpp_error (pfile, CPP_DL_PEDWARN, "missing terminating %c character",
663 (int) terminator);
665 pfile->buffer->cur = cur;
666 create_literal (pfile, token, base, cur - base, type);
669 /* The stored comment includes the comment start and any terminator. */
670 static void
671 save_comment (cpp_reader *pfile, cpp_token *token, const unsigned char *from,
672 cppchar_t type)
674 unsigned char *buffer;
675 unsigned int len, clen;
677 len = pfile->buffer->cur - from + 1; /* + 1 for the initial '/'. */
679 /* C++ comments probably (not definitely) have moved past a new
680 line, which we don't want to save in the comment. */
681 if (is_vspace (pfile->buffer->cur[-1]))
682 len--;
684 /* If we are currently in a directive, then we need to store all
685 C++ comments as C comments internally, and so we need to
686 allocate a little extra space in that case.
688 Note that the only time we encounter a directive here is
689 when we are saving comments in a "#define". */
690 clen = (pfile->state.in_directive && type == '/') ? len + 2 : len;
692 buffer = _cpp_unaligned_alloc (pfile, clen);
694 token->type = CPP_COMMENT;
695 token->val.str.len = clen;
696 token->val.str.text = buffer;
698 buffer[0] = '/';
699 memcpy (buffer + 1, from, len - 1);
701 /* Finish conversion to a C comment, if necessary. */
702 if (pfile->state.in_directive && type == '/')
704 buffer[1] = '*';
705 buffer[clen - 2] = '*';
706 buffer[clen - 1] = '/';
710 /* Allocate COUNT tokens for RUN. */
711 void
712 _cpp_init_tokenrun (tokenrun *run, unsigned int count)
714 run->base = XNEWVEC (cpp_token, count);
715 run->limit = run->base + count;
716 run->next = NULL;
719 /* Returns the next tokenrun, or creates one if there is none. */
720 static tokenrun *
721 next_tokenrun (tokenrun *run)
723 if (run->next == NULL)
725 run->next = XNEW (tokenrun);
726 run->next->prev = run;
727 _cpp_init_tokenrun (run->next, 250);
730 return run->next;
733 /* Allocate a single token that is invalidated at the same time as the
734 rest of the tokens on the line. Has its line and col set to the
735 same as the last lexed token, so that diagnostics appear in the
736 right place. */
737 cpp_token *
738 _cpp_temp_token (cpp_reader *pfile)
740 cpp_token *old, *result;
742 old = pfile->cur_token - 1;
743 if (pfile->cur_token == pfile->cur_run->limit)
745 pfile->cur_run = next_tokenrun (pfile->cur_run);
746 pfile->cur_token = pfile->cur_run->base;
749 result = pfile->cur_token++;
750 result->src_loc = old->src_loc;
751 return result;
754 /* Lex a token into RESULT (external interface). Takes care of issues
755 like directive handling, token lookahead, multiple include
756 optimization and skipping. */
757 const cpp_token *
758 _cpp_lex_token (cpp_reader *pfile)
760 cpp_token *result;
762 for (;;)
764 if (pfile->cur_token == pfile->cur_run->limit)
766 pfile->cur_run = next_tokenrun (pfile->cur_run);
767 pfile->cur_token = pfile->cur_run->base;
769 /* We assume that the current token is somewhere in the current
770 run. */
771 if (pfile->cur_token < pfile->cur_run->base
772 || pfile->cur_token >= pfile->cur_run->limit)
773 abort ();
775 if (pfile->lookaheads)
777 pfile->lookaheads--;
778 result = pfile->cur_token++;
780 else
781 result = _cpp_lex_direct (pfile);
783 if (result->flags & BOL)
785 /* Is this a directive. If _cpp_handle_directive returns
786 false, it is an assembler #. */
787 if (result->type == CPP_HASH
788 /* 6.10.3 p 11: Directives in a list of macro arguments
789 gives undefined behavior. This implementation
790 handles the directive as normal. */
791 && pfile->state.parsing_args != 1)
793 if (_cpp_handle_directive (pfile, result->flags & PREV_WHITE))
795 if (pfile->directive_result.type == CPP_PADDING)
796 continue;
797 result = &pfile->directive_result;
800 else if (pfile->state.in_deferred_pragma)
801 result = &pfile->directive_result;
803 if (pfile->cb.line_change && !pfile->state.skipping)
804 pfile->cb.line_change (pfile, result, pfile->state.parsing_args);
807 /* We don't skip tokens in directives. */
808 if (pfile->state.in_directive || pfile->state.in_deferred_pragma)
809 break;
811 /* Outside a directive, invalidate controlling macros. At file
812 EOF, _cpp_lex_direct takes care of popping the buffer, so we never
813 get here and MI optimization works. */
814 pfile->mi_valid = false;
816 if (!pfile->state.skipping || result->type == CPP_EOF)
817 break;
820 return result;
823 /* Returns true if a fresh line has been loaded. */
824 bool
825 _cpp_get_fresh_line (cpp_reader *pfile)
827 int return_at_eof;
829 /* We can't get a new line until we leave the current directive. */
830 if (pfile->state.in_directive)
831 return false;
833 for (;;)
835 cpp_buffer *buffer = pfile->buffer;
837 if (!buffer->need_line)
838 return true;
840 if (buffer->next_line < buffer->rlimit)
842 _cpp_clean_line (pfile);
843 return true;
846 /* First, get out of parsing arguments state. */
847 if (pfile->state.parsing_args)
848 return false;
850 /* End of buffer. Non-empty files should end in a newline. */
851 if (buffer->buf != buffer->rlimit
852 && buffer->next_line > buffer->rlimit
853 && !buffer->from_stage3)
855 /* Only warn once. */
856 buffer->next_line = buffer->rlimit;
857 cpp_error_with_line (pfile, CPP_DL_PEDWARN, pfile->line_table->highest_line,
858 CPP_BUF_COLUMN (buffer, buffer->cur),
859 "no newline at end of file");
862 return_at_eof = buffer->return_at_eof;
863 _cpp_pop_buffer (pfile);
864 if (pfile->buffer == NULL || return_at_eof)
865 return false;
869 #define IF_NEXT_IS(CHAR, THEN_TYPE, ELSE_TYPE) \
870 do \
872 result->type = ELSE_TYPE; \
873 if (*buffer->cur == CHAR) \
874 buffer->cur++, result->type = THEN_TYPE; \
876 while (0)
878 /* Lex a token into pfile->cur_token, which is also incremented, to
879 get diagnostics pointing to the correct location.
881 Does not handle issues such as token lookahead, multiple-include
882 optimization, directives, skipping etc. This function is only
883 suitable for use by _cpp_lex_token, and in special cases like
884 lex_expansion_token which doesn't care for any of these issues.
886 When meeting a newline, returns CPP_EOF if parsing a directive,
887 otherwise returns to the start of the token buffer if permissible.
888 Returns the location of the lexed token. */
889 cpp_token *
890 _cpp_lex_direct (cpp_reader *pfile)
892 cppchar_t c;
893 cpp_buffer *buffer;
894 const unsigned char *comment_start;
895 cpp_token *result = pfile->cur_token++;
897 fresh_line:
898 result->flags = 0;
899 buffer = pfile->buffer;
900 if (buffer->need_line)
902 if (pfile->state.in_deferred_pragma)
904 result->type = CPP_PRAGMA_EOL;
905 pfile->state.in_deferred_pragma = false;
906 if (!pfile->state.pragma_allow_expansion)
907 pfile->state.prevent_expansion--;
908 return result;
910 if (!_cpp_get_fresh_line (pfile))
912 result->type = CPP_EOF;
913 if (!pfile->state.in_directive)
915 /* Tell the compiler the line number of the EOF token. */
916 result->src_loc = pfile->line_table->highest_line;
917 result->flags = BOL;
919 return result;
921 if (!pfile->keep_tokens)
923 pfile->cur_run = &pfile->base_run;
924 result = pfile->base_run.base;
925 pfile->cur_token = result + 1;
927 result->flags = BOL;
928 if (pfile->state.parsing_args == 2)
929 result->flags |= PREV_WHITE;
931 buffer = pfile->buffer;
932 update_tokens_line:
933 result->src_loc = pfile->line_table->highest_line;
935 skipped_white:
936 if (buffer->cur >= buffer->notes[buffer->cur_note].pos
937 && !pfile->overlaid_buffer)
939 _cpp_process_line_notes (pfile, false);
940 result->src_loc = pfile->line_table->highest_line;
942 c = *buffer->cur++;
944 LINEMAP_POSITION_FOR_COLUMN (result->src_loc, pfile->line_table,
945 CPP_BUF_COLUMN (buffer, buffer->cur));
947 switch (c)
949 case ' ': case '\t': case '\f': case '\v': case '\0':
950 result->flags |= PREV_WHITE;
951 skip_whitespace (pfile, c);
952 goto skipped_white;
954 case '\n':
955 if (buffer->cur < buffer->rlimit)
956 CPP_INCREMENT_LINE (pfile, 0);
957 buffer->need_line = true;
958 goto fresh_line;
960 case '0': case '1': case '2': case '3': case '4':
961 case '5': case '6': case '7': case '8': case '9':
963 struct normalize_state nst = INITIAL_NORMALIZE_STATE;
964 result->type = CPP_NUMBER;
965 lex_number (pfile, &result->val.str, &nst);
966 warn_about_normalization (pfile, result, &nst);
967 break;
970 case 'L':
971 /* 'L' may introduce wide characters or strings. */
972 if (*buffer->cur == '\'' || *buffer->cur == '"')
974 lex_string (pfile, result, buffer->cur - 1);
975 break;
977 /* Fall through. */
979 case '_':
980 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
981 case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
982 case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
983 case 's': case 't': case 'u': case 'v': case 'w': case 'x':
984 case 'y': case 'z':
985 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
986 case 'G': case 'H': case 'I': case 'J': case 'K':
987 case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
988 case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
989 case 'Y': case 'Z':
990 result->type = CPP_NAME;
992 struct normalize_state nst = INITIAL_NORMALIZE_STATE;
993 result->val.node = lex_identifier (pfile, buffer->cur - 1, false,
994 &nst);
995 warn_about_normalization (pfile, result, &nst);
998 /* Convert named operators to their proper types. */
999 if (result->val.node->flags & NODE_OPERATOR)
1001 result->flags |= NAMED_OP;
1002 result->type = (enum cpp_ttype) result->val.node->directive_index;
1004 break;
1006 case '\'':
1007 case '"':
1008 lex_string (pfile, result, buffer->cur - 1);
1009 break;
1011 case '/':
1012 /* A potential block or line comment. */
1013 comment_start = buffer->cur;
1014 c = *buffer->cur;
1016 if (c == '*')
1018 if (_cpp_skip_block_comment (pfile))
1019 cpp_error (pfile, CPP_DL_ERROR, "unterminated comment");
1021 else if (c == '/' && (CPP_OPTION (pfile, cplusplus_comments)
1022 || cpp_in_system_header (pfile)))
1024 /* Warn about comments only if pedantically GNUC89, and not
1025 in system headers. */
1026 if (CPP_OPTION (pfile, lang) == CLK_GNUC89 && CPP_PEDANTIC (pfile)
1027 && ! buffer->warned_cplusplus_comments)
1029 cpp_error (pfile, CPP_DL_PEDWARN,
1030 "C++ style comments are not allowed in ISO C90");
1031 cpp_error (pfile, CPP_DL_PEDWARN,
1032 "(this will be reported only once per input file)");
1033 buffer->warned_cplusplus_comments = 1;
1036 if (skip_line_comment (pfile) && CPP_OPTION (pfile, warn_comments))
1037 cpp_error (pfile, CPP_DL_WARNING, "multi-line comment");
1039 else if (c == '=')
1041 buffer->cur++;
1042 result->type = CPP_DIV_EQ;
1043 break;
1045 else
1047 result->type = CPP_DIV;
1048 break;
1051 if (!pfile->state.save_comments)
1053 result->flags |= PREV_WHITE;
1054 goto update_tokens_line;
1057 /* Save the comment as a token in its own right. */
1058 save_comment (pfile, result, comment_start, c);
1059 break;
1061 case '<':
1062 if (pfile->state.angled_headers)
1064 lex_string (pfile, result, buffer->cur - 1);
1065 break;
1068 result->type = CPP_LESS;
1069 if (*buffer->cur == '=')
1070 buffer->cur++, result->type = CPP_LESS_EQ;
1071 else if (*buffer->cur == '<')
1073 buffer->cur++;
1074 IF_NEXT_IS ('=', CPP_LSHIFT_EQ, CPP_LSHIFT);
1076 else if (CPP_OPTION (pfile, digraphs))
1078 if (*buffer->cur == ':')
1080 buffer->cur++;
1081 result->flags |= DIGRAPH;
1082 result->type = CPP_OPEN_SQUARE;
1084 else if (*buffer->cur == '%')
1086 buffer->cur++;
1087 result->flags |= DIGRAPH;
1088 result->type = CPP_OPEN_BRACE;
1091 break;
1093 case '>':
1094 result->type = CPP_GREATER;
1095 if (*buffer->cur == '=')
1096 buffer->cur++, result->type = CPP_GREATER_EQ;
1097 else if (*buffer->cur == '>')
1099 buffer->cur++;
1100 IF_NEXT_IS ('=', CPP_RSHIFT_EQ, CPP_RSHIFT);
1102 break;
1104 case '%':
1105 result->type = CPP_MOD;
1106 if (*buffer->cur == '=')
1107 buffer->cur++, result->type = CPP_MOD_EQ;
1108 else if (CPP_OPTION (pfile, digraphs))
1110 if (*buffer->cur == ':')
1112 buffer->cur++;
1113 result->flags |= DIGRAPH;
1114 result->type = CPP_HASH;
1115 if (*buffer->cur == '%' && buffer->cur[1] == ':')
1116 buffer->cur += 2, result->type = CPP_PASTE;
1118 else if (*buffer->cur == '>')
1120 buffer->cur++;
1121 result->flags |= DIGRAPH;
1122 result->type = CPP_CLOSE_BRACE;
1125 break;
1127 case '.':
1128 result->type = CPP_DOT;
1129 if (ISDIGIT (*buffer->cur))
1131 struct normalize_state nst = INITIAL_NORMALIZE_STATE;
1132 result->type = CPP_NUMBER;
1133 lex_number (pfile, &result->val.str, &nst);
1134 warn_about_normalization (pfile, result, &nst);
1136 else if (*buffer->cur == '.' && buffer->cur[1] == '.')
1137 buffer->cur += 2, result->type = CPP_ELLIPSIS;
1138 else if (*buffer->cur == '*' && CPP_OPTION (pfile, cplusplus))
1139 buffer->cur++, result->type = CPP_DOT_STAR;
1140 break;
1142 case '+':
1143 result->type = CPP_PLUS;
1144 if (*buffer->cur == '+')
1145 buffer->cur++, result->type = CPP_PLUS_PLUS;
1146 else if (*buffer->cur == '=')
1147 buffer->cur++, result->type = CPP_PLUS_EQ;
1148 break;
1150 case '-':
1151 result->type = CPP_MINUS;
1152 if (*buffer->cur == '>')
1154 buffer->cur++;
1155 result->type = CPP_DEREF;
1156 if (*buffer->cur == '*' && CPP_OPTION (pfile, cplusplus))
1157 buffer->cur++, result->type = CPP_DEREF_STAR;
1159 else if (*buffer->cur == '-')
1160 buffer->cur++, result->type = CPP_MINUS_MINUS;
1161 else if (*buffer->cur == '=')
1162 buffer->cur++, result->type = CPP_MINUS_EQ;
1163 break;
1165 case '&':
1166 result->type = CPP_AND;
1167 if (*buffer->cur == '&')
1168 buffer->cur++, result->type = CPP_AND_AND;
1169 else if (*buffer->cur == '=')
1170 buffer->cur++, result->type = CPP_AND_EQ;
1171 break;
1173 case '|':
1174 result->type = CPP_OR;
1175 if (*buffer->cur == '|')
1176 buffer->cur++, result->type = CPP_OR_OR;
1177 else if (*buffer->cur == '=')
1178 buffer->cur++, result->type = CPP_OR_EQ;
1179 break;
1181 case ':':
1182 result->type = CPP_COLON;
1183 if (*buffer->cur == ':' && CPP_OPTION (pfile, cplusplus))
1184 buffer->cur++, result->type = CPP_SCOPE;
1185 else if (*buffer->cur == '>' && CPP_OPTION (pfile, digraphs))
1187 buffer->cur++;
1188 result->flags |= DIGRAPH;
1189 result->type = CPP_CLOSE_SQUARE;
1191 break;
1193 case '*': IF_NEXT_IS ('=', CPP_MULT_EQ, CPP_MULT); break;
1194 case '=': IF_NEXT_IS ('=', CPP_EQ_EQ, CPP_EQ); break;
1195 case '!': IF_NEXT_IS ('=', CPP_NOT_EQ, CPP_NOT); break;
1196 case '^': IF_NEXT_IS ('=', CPP_XOR_EQ, CPP_XOR); break;
1197 case '#': IF_NEXT_IS ('#', CPP_PASTE, CPP_HASH); break;
1199 case '?': result->type = CPP_QUERY; break;
1200 case '~': result->type = CPP_COMPL; break;
1201 case ',': result->type = CPP_COMMA; break;
1202 case '(': result->type = CPP_OPEN_PAREN; break;
1203 case ')': result->type = CPP_CLOSE_PAREN; break;
1204 case '[': result->type = CPP_OPEN_SQUARE; break;
1205 case ']': result->type = CPP_CLOSE_SQUARE; break;
1206 case '{': result->type = CPP_OPEN_BRACE; break;
1207 case '}': result->type = CPP_CLOSE_BRACE; break;
1208 case ';': result->type = CPP_SEMICOLON; break;
1210 /* @ is a punctuator in Objective-C. */
1211 case '@': result->type = CPP_ATSIGN; break;
1213 case '$':
1214 case '\\':
1216 const uchar *base = --buffer->cur;
1217 struct normalize_state nst = INITIAL_NORMALIZE_STATE;
1219 if (forms_identifier_p (pfile, true, &nst))
1221 result->type = CPP_NAME;
1222 result->val.node = lex_identifier (pfile, base, true, &nst);
1223 warn_about_normalization (pfile, result, &nst);
1224 break;
1226 buffer->cur++;
1229 default:
1230 create_literal (pfile, result, buffer->cur - 1, 1, CPP_OTHER);
1231 break;
1234 return result;
1237 /* An upper bound on the number of bytes needed to spell TOKEN.
1238 Does not include preceding whitespace. */
1239 unsigned int
1240 cpp_token_len (const cpp_token *token)
1242 unsigned int len;
1244 switch (TOKEN_SPELL (token))
1246 default: len = 4; break;
1247 case SPELL_LITERAL: len = token->val.str.len; break;
1248 case SPELL_IDENT: len = NODE_LEN (token->val.node) * 10; break;
1251 return len;
1254 /* Parse UTF-8 out of NAMEP and place a \U escape in BUFFER.
1255 Return the number of bytes read out of NAME. (There are always
1256 10 bytes written to BUFFER.) */
1258 static size_t
1259 utf8_to_ucn (unsigned char *buffer, const unsigned char *name)
1261 int j;
1262 int ucn_len = 0;
1263 int ucn_len_c;
1264 unsigned t;
1265 unsigned long utf32;
1267 /* Compute the length of the UTF-8 sequence. */
1268 for (t = *name; t & 0x80; t <<= 1)
1269 ucn_len++;
1271 utf32 = *name & (0x7F >> ucn_len);
1272 for (ucn_len_c = 1; ucn_len_c < ucn_len; ucn_len_c++)
1274 utf32 = (utf32 << 6) | (*++name & 0x3F);
1276 /* Ill-formed UTF-8. */
1277 if ((*name & ~0x3F) != 0x80)
1278 abort ();
1281 *buffer++ = '\\';
1282 *buffer++ = 'U';
1283 for (j = 7; j >= 0; j--)
1284 *buffer++ = "0123456789abcdef"[(utf32 >> (4 * j)) & 0xF];
1285 return ucn_len;
1289 /* Write the spelling of a token TOKEN to BUFFER. The buffer must
1290 already contain the enough space to hold the token's spelling.
1291 Returns a pointer to the character after the last character written.
1292 FORSTRING is true if this is to be the spelling after translation
1293 phase 1 (this is different for UCNs).
1294 FIXME: Would be nice if we didn't need the PFILE argument. */
1295 unsigned char *
1296 cpp_spell_token (cpp_reader *pfile, const cpp_token *token,
1297 unsigned char *buffer, bool forstring)
1299 switch (TOKEN_SPELL (token))
1301 case SPELL_OPERATOR:
1303 const unsigned char *spelling;
1304 unsigned char c;
1306 if (token->flags & DIGRAPH)
1307 spelling
1308 = digraph_spellings[(int) token->type - (int) CPP_FIRST_DIGRAPH];
1309 else if (token->flags & NAMED_OP)
1310 goto spell_ident;
1311 else
1312 spelling = TOKEN_NAME (token);
1314 while ((c = *spelling++) != '\0')
1315 *buffer++ = c;
1317 break;
1319 spell_ident:
1320 case SPELL_IDENT:
1321 if (forstring)
1323 memcpy (buffer, NODE_NAME (token->val.node),
1324 NODE_LEN (token->val.node));
1325 buffer += NODE_LEN (token->val.node);
1327 else
1329 size_t i;
1330 const unsigned char * name = NODE_NAME (token->val.node);
1332 for (i = 0; i < NODE_LEN (token->val.node); i++)
1333 if (name[i] & ~0x7F)
1335 i += utf8_to_ucn (buffer, name + i) - 1;
1336 buffer += 10;
1338 else
1339 *buffer++ = NODE_NAME (token->val.node)[i];
1341 break;
1343 case SPELL_LITERAL:
1344 memcpy (buffer, token->val.str.text, token->val.str.len);
1345 buffer += token->val.str.len;
1346 break;
1348 case SPELL_NONE:
1349 cpp_error (pfile, CPP_DL_ICE,
1350 "unspellable token %s", TOKEN_NAME (token));
1351 break;
1354 return buffer;
1357 /* Returns TOKEN spelt as a null-terminated string. The string is
1358 freed when the reader is destroyed. Useful for diagnostics. */
1359 unsigned char *
1360 cpp_token_as_text (cpp_reader *pfile, const cpp_token *token)
1362 unsigned int len = cpp_token_len (token) + 1;
1363 unsigned char *start = _cpp_unaligned_alloc (pfile, len), *end;
1365 end = cpp_spell_token (pfile, token, start, false);
1366 end[0] = '\0';
1368 return start;
1371 /* Used by C front ends, which really should move to using
1372 cpp_token_as_text. */
1373 const char *
1374 cpp_type2name (enum cpp_ttype type)
1376 return (const char *) token_spellings[type].name;
1379 /* Writes the spelling of token to FP, without any preceding space.
1380 Separated from cpp_spell_token for efficiency - to avoid stdio
1381 double-buffering. */
1382 void
1383 cpp_output_token (const cpp_token *token, FILE *fp)
1385 switch (TOKEN_SPELL (token))
1387 case SPELL_OPERATOR:
1389 const unsigned char *spelling;
1390 int c;
1392 if (token->flags & DIGRAPH)
1393 spelling
1394 = digraph_spellings[(int) token->type - (int) CPP_FIRST_DIGRAPH];
1395 else if (token->flags & NAMED_OP)
1396 goto spell_ident;
1397 else
1398 spelling = TOKEN_NAME (token);
1400 c = *spelling;
1402 putc (c, fp);
1403 while ((c = *++spelling) != '\0');
1405 break;
1407 spell_ident:
1408 case SPELL_IDENT:
1410 size_t i;
1411 const unsigned char * name = NODE_NAME (token->val.node);
1413 for (i = 0; i < NODE_LEN (token->val.node); i++)
1414 if (name[i] & ~0x7F)
1416 unsigned char buffer[10];
1417 i += utf8_to_ucn (buffer, name + i) - 1;
1418 fwrite (buffer, 1, 10, fp);
1420 else
1421 fputc (NODE_NAME (token->val.node)[i], fp);
1423 break;
1425 case SPELL_LITERAL:
1426 fwrite (token->val.str.text, 1, token->val.str.len, fp);
1427 break;
1429 case SPELL_NONE:
1430 /* An error, most probably. */
1431 break;
1435 /* Compare two tokens. */
1437 _cpp_equiv_tokens (const cpp_token *a, const cpp_token *b)
1439 if (a->type == b->type && a->flags == b->flags)
1440 switch (TOKEN_SPELL (a))
1442 default: /* Keep compiler happy. */
1443 case SPELL_OPERATOR:
1444 return 1;
1445 case SPELL_NONE:
1446 return (a->type != CPP_MACRO_ARG || a->val.arg_no == b->val.arg_no);
1447 case SPELL_IDENT:
1448 return a->val.node == b->val.node;
1449 case SPELL_LITERAL:
1450 return (a->val.str.len == b->val.str.len
1451 && !memcmp (a->val.str.text, b->val.str.text,
1452 a->val.str.len));
1455 return 0;
1458 /* Returns nonzero if a space should be inserted to avoid an
1459 accidental token paste for output. For simplicity, it is
1460 conservative, and occasionally advises a space where one is not
1461 needed, e.g. "." and ".2". */
1463 cpp_avoid_paste (cpp_reader *pfile, const cpp_token *token1,
1464 const cpp_token *token2)
1466 enum cpp_ttype a = token1->type, b = token2->type;
1467 cppchar_t c;
1469 if (token1->flags & NAMED_OP)
1470 a = CPP_NAME;
1471 if (token2->flags & NAMED_OP)
1472 b = CPP_NAME;
1474 c = EOF;
1475 if (token2->flags & DIGRAPH)
1476 c = digraph_spellings[(int) b - (int) CPP_FIRST_DIGRAPH][0];
1477 else if (token_spellings[b].category == SPELL_OPERATOR)
1478 c = token_spellings[b].name[0];
1480 /* Quickly get everything that can paste with an '='. */
1481 if ((int) a <= (int) CPP_LAST_EQ && c == '=')
1482 return 1;
1484 switch (a)
1486 case CPP_GREATER: return c == '>';
1487 case CPP_LESS: return c == '<' || c == '%' || c == ':';
1488 case CPP_PLUS: return c == '+';
1489 case CPP_MINUS: return c == '-' || c == '>';
1490 case CPP_DIV: return c == '/' || c == '*'; /* Comments. */
1491 case CPP_MOD: return c == ':' || c == '>';
1492 case CPP_AND: return c == '&';
1493 case CPP_OR: return c == '|';
1494 case CPP_COLON: return c == ':' || c == '>';
1495 case CPP_DEREF: return c == '*';
1496 case CPP_DOT: return c == '.' || c == '%' || b == CPP_NUMBER;
1497 case CPP_HASH: return c == '#' || c == '%'; /* Digraph form. */
1498 case CPP_NAME: return ((b == CPP_NUMBER
1499 && name_p (pfile, &token2->val.str))
1500 || b == CPP_NAME
1501 || b == CPP_CHAR || b == CPP_STRING); /* L */
1502 case CPP_NUMBER: return (b == CPP_NUMBER || b == CPP_NAME
1503 || c == '.' || c == '+' || c == '-');
1504 /* UCNs */
1505 case CPP_OTHER: return ((token1->val.str.text[0] == '\\'
1506 && b == CPP_NAME)
1507 || (CPP_OPTION (pfile, objc)
1508 && token1->val.str.text[0] == '@'
1509 && (b == CPP_NAME || b == CPP_STRING)));
1510 default: break;
1513 return 0;
1516 /* Output all the remaining tokens on the current line, and a newline
1517 character, to FP. Leading whitespace is removed. If there are
1518 macros, special token padding is not performed. */
1519 void
1520 cpp_output_line (cpp_reader *pfile, FILE *fp)
1522 const cpp_token *token;
1524 token = cpp_get_token (pfile);
1525 while (token->type != CPP_EOF)
1527 cpp_output_token (token, fp);
1528 token = cpp_get_token (pfile);
1529 if (token->flags & PREV_WHITE)
1530 putc (' ', fp);
1533 putc ('\n', fp);
1536 /* Memory buffers. Changing these three constants can have a dramatic
1537 effect on performance. The values here are reasonable defaults,
1538 but might be tuned. If you adjust them, be sure to test across a
1539 range of uses of cpplib, including heavy nested function-like macro
1540 expansion. Also check the change in peak memory usage (NJAMD is a
1541 good tool for this). */
1542 #define MIN_BUFF_SIZE 8000
1543 #define BUFF_SIZE_UPPER_BOUND(MIN_SIZE) (MIN_BUFF_SIZE + (MIN_SIZE) * 3 / 2)
1544 #define EXTENDED_BUFF_SIZE(BUFF, MIN_EXTRA) \
1545 (MIN_EXTRA + ((BUFF)->limit - (BUFF)->cur) * 2)
1547 #if MIN_BUFF_SIZE > BUFF_SIZE_UPPER_BOUND (0)
1548 #error BUFF_SIZE_UPPER_BOUND must be at least as large as MIN_BUFF_SIZE!
1549 #endif
1551 /* Create a new allocation buffer. Place the control block at the end
1552 of the buffer, so that buffer overflows will cause immediate chaos. */
1553 static _cpp_buff *
1554 new_buff (size_t len)
1556 _cpp_buff *result;
1557 unsigned char *base;
1559 if (len < MIN_BUFF_SIZE)
1560 len = MIN_BUFF_SIZE;
1561 len = CPP_ALIGN (len);
1563 base = XNEWVEC (unsigned char, len + sizeof (_cpp_buff));
1564 result = (_cpp_buff *) (base + len);
1565 result->base = base;
1566 result->cur = base;
1567 result->limit = base + len;
1568 result->next = NULL;
1569 return result;
1572 /* Place a chain of unwanted allocation buffers on the free list. */
1573 void
1574 _cpp_release_buff (cpp_reader *pfile, _cpp_buff *buff)
1576 _cpp_buff *end = buff;
1578 while (end->next)
1579 end = end->next;
1580 end->next = pfile->free_buffs;
1581 pfile->free_buffs = buff;
1584 /* Return a free buffer of size at least MIN_SIZE. */
1585 _cpp_buff *
1586 _cpp_get_buff (cpp_reader *pfile, size_t min_size)
1588 _cpp_buff *result, **p;
1590 for (p = &pfile->free_buffs;; p = &(*p)->next)
1592 size_t size;
1594 if (*p == NULL)
1595 return new_buff (min_size);
1596 result = *p;
1597 size = result->limit - result->base;
1598 /* Return a buffer that's big enough, but don't waste one that's
1599 way too big. */
1600 if (size >= min_size && size <= BUFF_SIZE_UPPER_BOUND (min_size))
1601 break;
1604 *p = result->next;
1605 result->next = NULL;
1606 result->cur = result->base;
1607 return result;
1610 /* Creates a new buffer with enough space to hold the uncommitted
1611 remaining bytes of BUFF, and at least MIN_EXTRA more bytes. Copies
1612 the excess bytes to the new buffer. Chains the new buffer after
1613 BUFF, and returns the new buffer. */
1614 _cpp_buff *
1615 _cpp_append_extend_buff (cpp_reader *pfile, _cpp_buff *buff, size_t min_extra)
1617 size_t size = EXTENDED_BUFF_SIZE (buff, min_extra);
1618 _cpp_buff *new_buff = _cpp_get_buff (pfile, size);
1620 buff->next = new_buff;
1621 memcpy (new_buff->base, buff->cur, BUFF_ROOM (buff));
1622 return new_buff;
1625 /* Creates a new buffer with enough space to hold the uncommitted
1626 remaining bytes of the buffer pointed to by BUFF, and at least
1627 MIN_EXTRA more bytes. Copies the excess bytes to the new buffer.
1628 Chains the new buffer before the buffer pointed to by BUFF, and
1629 updates the pointer to point to the new buffer. */
1630 void
1631 _cpp_extend_buff (cpp_reader *pfile, _cpp_buff **pbuff, size_t min_extra)
1633 _cpp_buff *new_buff, *old_buff = *pbuff;
1634 size_t size = EXTENDED_BUFF_SIZE (old_buff, min_extra);
1636 new_buff = _cpp_get_buff (pfile, size);
1637 memcpy (new_buff->base, old_buff->cur, BUFF_ROOM (old_buff));
1638 new_buff->next = old_buff;
1639 *pbuff = new_buff;
1642 /* Free a chain of buffers starting at BUFF. */
1643 void
1644 _cpp_free_buff (_cpp_buff *buff)
1646 _cpp_buff *next;
1648 for (; buff; buff = next)
1650 next = buff->next;
1651 free (buff->base);
1655 /* Allocate permanent, unaligned storage of length LEN. */
1656 unsigned char *
1657 _cpp_unaligned_alloc (cpp_reader *pfile, size_t len)
1659 _cpp_buff *buff = pfile->u_buff;
1660 unsigned char *result = buff->cur;
1662 if (len > (size_t) (buff->limit - result))
1664 buff = _cpp_get_buff (pfile, len);
1665 buff->next = pfile->u_buff;
1666 pfile->u_buff = buff;
1667 result = buff->cur;
1670 buff->cur = result + len;
1671 return result;
1674 /* Allocate permanent, unaligned storage of length LEN from a_buff.
1675 That buffer is used for growing allocations when saving macro
1676 replacement lists in a #define, and when parsing an answer to an
1677 assertion in #assert, #unassert or #if (and therefore possibly
1678 whilst expanding macros). It therefore must not be used by any
1679 code that they might call: specifically the lexer and the guts of
1680 the macro expander.
1682 All existing other uses clearly fit this restriction: storing
1683 registered pragmas during initialization. */
1684 unsigned char *
1685 _cpp_aligned_alloc (cpp_reader *pfile, size_t len)
1687 _cpp_buff *buff = pfile->a_buff;
1688 unsigned char *result = buff->cur;
1690 if (len > (size_t) (buff->limit - result))
1692 buff = _cpp_get_buff (pfile, len);
1693 buff->next = pfile->a_buff;
1694 pfile->a_buff = buff;
1695 result = buff->cur;
1698 buff->cur = result + len;
1699 return result;
1702 /* Say which field of TOK is in use. */
1704 enum cpp_token_fld_kind
1705 cpp_token_val_index (cpp_token *tok)
1707 switch (TOKEN_SPELL (tok))
1709 case SPELL_IDENT:
1710 return CPP_TOKEN_FLD_NODE;
1711 case SPELL_LITERAL:
1712 return CPP_TOKEN_FLD_STR;
1713 case SPELL_NONE:
1714 if (tok->type == CPP_MACRO_ARG)
1715 return CPP_TOKEN_FLD_ARG_NO;
1716 else if (tok->type == CPP_PADDING)
1717 return CPP_TOKEN_FLD_SOURCE;
1718 else if (tok->type == CPP_PRAGMA)
1719 return CPP_TOKEN_FLD_PRAGMA;
1720 /* else fall through */
1721 default:
1722 return CPP_TOKEN_FLD_NONE;