2006-08-06 Paolo Carlini <pcarlini@suse.de>
[official-gcc.git] / libcpp / lex.c
blobcae9b0376636b6c5e6066246eeb43dcef6b965ca
1 /* CPP Library - lexical analysis.
2 Copyright (C) 2000, 2001, 2002, 2003, 2004, 2005 Free Software Foundation, Inc.
3 Contributed by Per Bothner, 1994-95.
4 Based on CCCP program by Paul Rubin, June 1986
5 Adapted to ANSI C, Richard Stallman, Jan 1987
6 Broken out to separate file, Zack Weinberg, Mar 2000
8 This program is free software; you can redistribute it and/or modify it
9 under the terms of the GNU General Public License as published by the
10 Free Software Foundation; either version 2, or (at your option) any
11 later version.
13 This program is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with this program; if not, write to the Free Software
20 Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
22 #include "config.h"
23 #include "system.h"
24 #include "cpplib.h"
25 #include "internal.h"
27 enum spell_type
29 SPELL_OPERATOR = 0,
30 SPELL_IDENT,
31 SPELL_LITERAL,
32 SPELL_NONE
35 struct token_spelling
37 enum spell_type category;
38 const unsigned char *name;
41 static const unsigned char *const digraph_spellings[] =
42 { U"%:", U"%:%:", U"<:", U":>", U"<%", U"%>" };
44 #define OP(e, s) { SPELL_OPERATOR, U s },
45 #define TK(e, s) { SPELL_ ## s, U #e },
46 static const struct token_spelling token_spellings[N_TTYPES] = { TTYPE_TABLE };
47 #undef OP
48 #undef TK
50 #define TOKEN_SPELL(token) (token_spellings[(token)->type].category)
51 #define TOKEN_NAME(token) (token_spellings[(token)->type].name)
53 static void add_line_note (cpp_buffer *, const uchar *, unsigned int);
54 static int skip_line_comment (cpp_reader *);
55 static void skip_whitespace (cpp_reader *, cppchar_t);
56 static void lex_string (cpp_reader *, cpp_token *, const uchar *);
57 static void save_comment (cpp_reader *, cpp_token *, const uchar *, cppchar_t);
58 static void create_literal (cpp_reader *, cpp_token *, const uchar *,
59 unsigned int, enum cpp_ttype);
60 static bool warn_in_comment (cpp_reader *, _cpp_line_note *);
61 static int name_p (cpp_reader *, const cpp_string *);
62 static tokenrun *next_tokenrun (tokenrun *);
64 static _cpp_buff *new_buff (size_t);
67 /* Utility routine:
69 Compares, the token TOKEN to the NUL-terminated string STRING.
70 TOKEN must be a CPP_NAME. Returns 1 for equal, 0 for unequal. */
71 int
72 cpp_ideq (const cpp_token *token, const char *string)
74 if (token->type != CPP_NAME)
75 return 0;
77 return !ustrcmp (NODE_NAME (token->val.node), (const uchar *) string);
80 /* Record a note TYPE at byte POS into the current cleaned logical
81 line. */
82 static void
83 add_line_note (cpp_buffer *buffer, const uchar *pos, unsigned int type)
85 if (buffer->notes_used == buffer->notes_cap)
87 buffer->notes_cap = buffer->notes_cap * 2 + 200;
88 buffer->notes = XRESIZEVEC (_cpp_line_note, buffer->notes,
89 buffer->notes_cap);
92 buffer->notes[buffer->notes_used].pos = pos;
93 buffer->notes[buffer->notes_used].type = type;
94 buffer->notes_used++;
97 /* Returns with a logical line that contains no escaped newlines or
98 trigraphs. This is a time-critical inner loop. */
99 void
100 _cpp_clean_line (cpp_reader *pfile)
102 cpp_buffer *buffer;
103 const uchar *s;
104 uchar c, *d, *p;
106 buffer = pfile->buffer;
107 buffer->cur_note = buffer->notes_used = 0;
108 buffer->cur = buffer->line_base = buffer->next_line;
109 buffer->need_line = false;
110 s = buffer->next_line - 1;
112 if (!buffer->from_stage3)
114 /* Short circuit for the common case of an un-escaped line with
115 no trigraphs. The primary win here is by not writing any
116 data back to memory until we have to. */
117 for (;;)
119 c = *++s;
120 if (c == '\n' || c == '\r')
122 d = (uchar *) s;
124 if (s == buffer->rlimit)
125 goto done;
127 /* DOS line ending? */
128 if (c == '\r' && s[1] == '\n')
129 s++;
131 if (s == buffer->rlimit)
132 goto done;
134 /* check for escaped newline */
135 p = d;
136 while (p != buffer->next_line && is_nvspace (p[-1]))
137 p--;
138 if (p == buffer->next_line || p[-1] != '\\')
139 goto done;
141 /* Have an escaped newline; process it and proceed to
142 the slow path. */
143 add_line_note (buffer, p - 1, p != d ? ' ' : '\\');
144 d = p - 2;
145 buffer->next_line = p - 1;
146 break;
148 if (c == '?' && s[1] == '?' && _cpp_trigraph_map[s[2]])
150 /* Have a trigraph. We may or may not have to convert
151 it. Add a line note regardless, for -Wtrigraphs. */
152 add_line_note (buffer, s, s[2]);
153 if (CPP_OPTION (pfile, trigraphs))
155 /* We do, and that means we have to switch to the
156 slow path. */
157 d = (uchar *) s;
158 *d = _cpp_trigraph_map[s[2]];
159 s += 2;
160 break;
166 for (;;)
168 c = *++s;
169 *++d = c;
171 if (c == '\n' || c == '\r')
173 /* Handle DOS line endings. */
174 if (c == '\r' && s != buffer->rlimit && s[1] == '\n')
175 s++;
176 if (s == buffer->rlimit)
177 break;
179 /* Escaped? */
180 p = d;
181 while (p != buffer->next_line && is_nvspace (p[-1]))
182 p--;
183 if (p == buffer->next_line || p[-1] != '\\')
184 break;
186 add_line_note (buffer, p - 1, p != d ? ' ': '\\');
187 d = p - 2;
188 buffer->next_line = p - 1;
190 else if (c == '?' && s[1] == '?' && _cpp_trigraph_map[s[2]])
192 /* Add a note regardless, for the benefit of -Wtrigraphs. */
193 add_line_note (buffer, d, s[2]);
194 if (CPP_OPTION (pfile, trigraphs))
196 *d = _cpp_trigraph_map[s[2]];
197 s += 2;
202 else
205 s++;
206 while (*s != '\n' && *s != '\r');
207 d = (uchar *) s;
209 /* Handle DOS line endings. */
210 if (*s == '\r' && s != buffer->rlimit && s[1] == '\n')
211 s++;
214 done:
215 *d = '\n';
216 /* A sentinel note that should never be processed. */
217 add_line_note (buffer, d + 1, '\n');
218 buffer->next_line = s + 1;
221 /* Return true if the trigraph indicated by NOTE should be warned
222 about in a comment. */
223 static bool
224 warn_in_comment (cpp_reader *pfile, _cpp_line_note *note)
226 const uchar *p;
228 /* Within comments we don't warn about trigraphs, unless the
229 trigraph forms an escaped newline, as that may change
230 behavior. */
231 if (note->type != '/')
232 return false;
234 /* If -trigraphs, then this was an escaped newline iff the next note
235 is coincident. */
236 if (CPP_OPTION (pfile, trigraphs))
237 return note[1].pos == note->pos;
239 /* Otherwise, see if this forms an escaped newline. */
240 p = note->pos + 3;
241 while (is_nvspace (*p))
242 p++;
244 /* There might have been escaped newlines between the trigraph and the
245 newline we found. Hence the position test. */
246 return (*p == '\n' && p < note[1].pos);
249 /* Process the notes created by add_line_note as far as the current
250 location. */
251 void
252 _cpp_process_line_notes (cpp_reader *pfile, int in_comment)
254 cpp_buffer *buffer = pfile->buffer;
256 for (;;)
258 _cpp_line_note *note = &buffer->notes[buffer->cur_note];
259 unsigned int col;
261 if (note->pos > buffer->cur)
262 break;
264 buffer->cur_note++;
265 col = CPP_BUF_COLUMN (buffer, note->pos + 1);
267 if (note->type == '\\' || note->type == ' ')
269 if (note->type == ' ' && !in_comment)
270 cpp_error_with_line (pfile, CPP_DL_WARNING, pfile->line_table->highest_line, col,
271 "backslash and newline separated by space");
273 if (buffer->next_line > buffer->rlimit)
275 cpp_error_with_line (pfile, CPP_DL_PEDWARN, pfile->line_table->highest_line, col,
276 "backslash-newline at end of file");
277 /* Prevent "no newline at end of file" warning. */
278 buffer->next_line = buffer->rlimit;
281 buffer->line_base = note->pos;
282 CPP_INCREMENT_LINE (pfile, 0);
284 else if (_cpp_trigraph_map[note->type])
286 if (CPP_OPTION (pfile, warn_trigraphs)
287 && (!in_comment || warn_in_comment (pfile, note)))
289 if (CPP_OPTION (pfile, trigraphs))
290 cpp_error_with_line (pfile, CPP_DL_WARNING, pfile->line_table->highest_line, col,
291 "trigraph ??%c converted to %c",
292 note->type,
293 (int) _cpp_trigraph_map[note->type]);
294 else
296 cpp_error_with_line
297 (pfile, CPP_DL_WARNING, pfile->line_table->highest_line, col,
298 "trigraph ??%c ignored, use -trigraphs to enable",
299 note->type);
303 else
304 abort ();
308 /* Skip a C-style block comment. We find the end of the comment by
309 seeing if an asterisk is before every '/' we encounter. Returns
310 nonzero if comment terminated by EOF, zero otherwise.
312 Buffer->cur points to the initial asterisk of the comment. */
313 bool
314 _cpp_skip_block_comment (cpp_reader *pfile)
316 cpp_buffer *buffer = pfile->buffer;
317 const uchar *cur = buffer->cur;
318 uchar c;
320 cur++;
321 if (*cur == '/')
322 cur++;
324 for (;;)
326 /* People like decorating comments with '*', so check for '/'
327 instead for efficiency. */
328 c = *cur++;
330 if (c == '/')
332 if (cur[-2] == '*')
333 break;
335 /* Warn about potential nested comments, but not if the '/'
336 comes immediately before the true comment delimiter.
337 Don't bother to get it right across escaped newlines. */
338 if (CPP_OPTION (pfile, warn_comments)
339 && cur[0] == '*' && cur[1] != '/')
341 buffer->cur = cur;
342 cpp_error_with_line (pfile, CPP_DL_WARNING,
343 pfile->line_table->highest_line, CPP_BUF_COL (buffer),
344 "\"/*\" within comment");
347 else if (c == '\n')
349 unsigned int cols;
350 buffer->cur = cur - 1;
351 _cpp_process_line_notes (pfile, true);
352 if (buffer->next_line >= buffer->rlimit)
353 return true;
354 _cpp_clean_line (pfile);
356 cols = buffer->next_line - buffer->line_base;
357 CPP_INCREMENT_LINE (pfile, cols);
359 cur = buffer->cur;
363 buffer->cur = cur;
364 _cpp_process_line_notes (pfile, true);
365 return false;
368 /* Skip a C++ line comment, leaving buffer->cur pointing to the
369 terminating newline. Handles escaped newlines. Returns nonzero
370 if a multiline comment. */
371 static int
372 skip_line_comment (cpp_reader *pfile)
374 cpp_buffer *buffer = pfile->buffer;
375 unsigned int orig_line = pfile->line_table->highest_line;
377 while (*buffer->cur != '\n')
378 buffer->cur++;
380 _cpp_process_line_notes (pfile, true);
381 return orig_line != pfile->line_table->highest_line;
384 /* Skips whitespace, saving the next non-whitespace character. */
385 static void
386 skip_whitespace (cpp_reader *pfile, cppchar_t c)
388 cpp_buffer *buffer = pfile->buffer;
389 bool saw_NUL = false;
393 /* Horizontal space always OK. */
394 if (c == ' ' || c == '\t')
396 /* Just \f \v or \0 left. */
397 else if (c == '\0')
398 saw_NUL = true;
399 else if (pfile->state.in_directive && CPP_PEDANTIC (pfile))
400 cpp_error_with_line (pfile, CPP_DL_PEDWARN, pfile->line_table->highest_line,
401 CPP_BUF_COL (buffer),
402 "%s in preprocessing directive",
403 c == '\f' ? "form feed" : "vertical tab");
405 c = *buffer->cur++;
407 /* We only want non-vertical space, i.e. ' ' \t \f \v \0. */
408 while (is_nvspace (c));
410 if (saw_NUL)
411 cpp_error (pfile, CPP_DL_WARNING, "null character(s) ignored");
413 buffer->cur--;
416 /* See if the characters of a number token are valid in a name (no
417 '.', '+' or '-'). */
418 static int
419 name_p (cpp_reader *pfile, const cpp_string *string)
421 unsigned int i;
423 for (i = 0; i < string->len; i++)
424 if (!is_idchar (string->text[i]))
425 return 0;
427 return 1;
430 /* After parsing an identifier or other sequence, produce a warning about
431 sequences not in NFC/NFKC. */
432 static void
433 warn_about_normalization (cpp_reader *pfile,
434 const cpp_token *token,
435 const struct normalize_state *s)
437 if (CPP_OPTION (pfile, warn_normalize) < NORMALIZE_STATE_RESULT (s)
438 && !pfile->state.skipping)
440 /* Make sure that the token is printed using UCNs, even
441 if we'd otherwise happily print UTF-8. */
442 unsigned char *buf = XNEWVEC (unsigned char, cpp_token_len (token));
443 size_t sz;
445 sz = cpp_spell_token (pfile, token, buf, false) - buf;
446 if (NORMALIZE_STATE_RESULT (s) == normalized_C)
447 cpp_error_with_line (pfile, CPP_DL_WARNING, token->src_loc, 0,
448 "`%.*s' is not in NFKC", (int) sz, buf);
449 else
450 cpp_error_with_line (pfile, CPP_DL_WARNING, token->src_loc, 0,
451 "`%.*s' is not in NFC", (int) sz, buf);
455 /* Returns TRUE if the sequence starting at buffer->cur is invalid in
456 an identifier. FIRST is TRUE if this starts an identifier. */
457 static bool
458 forms_identifier_p (cpp_reader *pfile, int first,
459 struct normalize_state *state)
461 cpp_buffer *buffer = pfile->buffer;
463 if (*buffer->cur == '$')
465 if (!CPP_OPTION (pfile, dollars_in_ident))
466 return false;
468 buffer->cur++;
469 if (CPP_OPTION (pfile, warn_dollars) && !pfile->state.skipping)
471 CPP_OPTION (pfile, warn_dollars) = 0;
472 cpp_error (pfile, CPP_DL_PEDWARN, "'$' in identifier or number");
475 return true;
478 /* Is this a syntactically valid UCN? */
479 if (CPP_OPTION (pfile, extended_identifiers)
480 && *buffer->cur == '\\'
481 && (buffer->cur[1] == 'u' || buffer->cur[1] == 'U'))
483 buffer->cur += 2;
484 if (_cpp_valid_ucn (pfile, &buffer->cur, buffer->rlimit, 1 + !first,
485 state))
486 return true;
487 buffer->cur -= 2;
490 return false;
493 /* Lex an identifier starting at BUFFER->CUR - 1. */
494 static cpp_hashnode *
495 lex_identifier (cpp_reader *pfile, const uchar *base, bool starts_ucn,
496 struct normalize_state *nst)
498 cpp_hashnode *result;
499 const uchar *cur;
500 unsigned int len;
501 unsigned int hash = HT_HASHSTEP (0, *base);
503 cur = pfile->buffer->cur;
504 if (! starts_ucn)
505 while (ISIDNUM (*cur))
507 hash = HT_HASHSTEP (hash, *cur);
508 cur++;
510 pfile->buffer->cur = cur;
511 if (starts_ucn || forms_identifier_p (pfile, false, nst))
513 /* Slower version for identifiers containing UCNs (or $). */
514 do {
515 while (ISIDNUM (*pfile->buffer->cur))
517 pfile->buffer->cur++;
518 NORMALIZE_STATE_UPDATE_IDNUM (nst);
520 } while (forms_identifier_p (pfile, false, nst));
521 result = _cpp_interpret_identifier (pfile, base,
522 pfile->buffer->cur - base);
524 else
526 len = cur - base;
527 hash = HT_HASHFINISH (hash, len);
529 result = (cpp_hashnode *)
530 ht_lookup_with_hash (pfile->hash_table, base, len, hash, HT_ALLOC);
533 /* Rarely, identifiers require diagnostics when lexed. */
534 if (__builtin_expect ((result->flags & NODE_DIAGNOSTIC)
535 && !pfile->state.skipping, 0))
537 /* It is allowed to poison the same identifier twice. */
538 if ((result->flags & NODE_POISONED) && !pfile->state.poisoned_ok)
539 cpp_error (pfile, CPP_DL_ERROR, "attempt to use poisoned \"%s\"",
540 NODE_NAME (result));
542 /* Constraint 6.10.3.5: __VA_ARGS__ should only appear in the
543 replacement list of a variadic macro. */
544 if (result == pfile->spec_nodes.n__VA_ARGS__
545 && !pfile->state.va_args_ok)
546 cpp_error (pfile, CPP_DL_PEDWARN,
547 "__VA_ARGS__ can only appear in the expansion"
548 " of a C99 variadic macro");
551 return result;
554 /* Lex a number to NUMBER starting at BUFFER->CUR - 1. */
555 static void
556 lex_number (cpp_reader *pfile, cpp_string *number,
557 struct normalize_state *nst)
559 const uchar *cur;
560 const uchar *base;
561 uchar *dest;
563 base = pfile->buffer->cur - 1;
566 cur = pfile->buffer->cur;
568 /* N.B. ISIDNUM does not include $. */
569 while (ISIDNUM (*cur) || *cur == '.' || VALID_SIGN (*cur, cur[-1]))
571 cur++;
572 NORMALIZE_STATE_UPDATE_IDNUM (nst);
575 pfile->buffer->cur = cur;
577 while (forms_identifier_p (pfile, false, nst));
579 number->len = cur - base;
580 dest = _cpp_unaligned_alloc (pfile, number->len + 1);
581 memcpy (dest, base, number->len);
582 dest[number->len] = '\0';
583 number->text = dest;
586 /* Create a token of type TYPE with a literal spelling. */
587 static void
588 create_literal (cpp_reader *pfile, cpp_token *token, const uchar *base,
589 unsigned int len, enum cpp_ttype type)
591 uchar *dest = _cpp_unaligned_alloc (pfile, len + 1);
593 memcpy (dest, base, len);
594 dest[len] = '\0';
595 token->type = type;
596 token->val.str.len = len;
597 token->val.str.text = dest;
600 /* Lexes a string, character constant, or angle-bracketed header file
601 name. The stored string contains the spelling, including opening
602 quote and leading any leading 'L'. It returns the type of the
603 literal, or CPP_OTHER if it was not properly terminated.
605 The spelling is NUL-terminated, but it is not guaranteed that this
606 is the first NUL since embedded NULs are preserved. */
607 static void
608 lex_string (cpp_reader *pfile, cpp_token *token, const uchar *base)
610 bool saw_NUL = false;
611 const uchar *cur;
612 cppchar_t terminator;
613 enum cpp_ttype type;
615 cur = base;
616 terminator = *cur++;
617 if (terminator == 'L')
618 terminator = *cur++;
619 if (terminator == '\"')
620 type = *base == 'L' ? CPP_WSTRING: CPP_STRING;
621 else if (terminator == '\'')
622 type = *base == 'L' ? CPP_WCHAR: CPP_CHAR;
623 else
624 terminator = '>', type = CPP_HEADER_NAME;
626 for (;;)
628 cppchar_t c = *cur++;
630 /* In #include-style directives, terminators are not escapable. */
631 if (c == '\\' && !pfile->state.angled_headers && *cur != '\n')
632 cur++;
633 else if (c == terminator)
634 break;
635 else if (c == '\n')
637 cur--;
638 type = CPP_OTHER;
639 break;
641 else if (c == '\0')
642 saw_NUL = true;
645 if (saw_NUL && !pfile->state.skipping)
646 cpp_error (pfile, CPP_DL_WARNING,
647 "null character(s) preserved in literal");
649 pfile->buffer->cur = cur;
650 create_literal (pfile, token, base, cur - base, type);
653 /* The stored comment includes the comment start and any terminator. */
654 static void
655 save_comment (cpp_reader *pfile, cpp_token *token, const unsigned char *from,
656 cppchar_t type)
658 unsigned char *buffer;
659 unsigned int len, clen;
661 len = pfile->buffer->cur - from + 1; /* + 1 for the initial '/'. */
663 /* C++ comments probably (not definitely) have moved past a new
664 line, which we don't want to save in the comment. */
665 if (is_vspace (pfile->buffer->cur[-1]))
666 len--;
668 /* If we are currently in a directive, then we need to store all
669 C++ comments as C comments internally, and so we need to
670 allocate a little extra space in that case.
672 Note that the only time we encounter a directive here is
673 when we are saving comments in a "#define". */
674 clen = (pfile->state.in_directive && type == '/') ? len + 2 : len;
676 buffer = _cpp_unaligned_alloc (pfile, clen);
678 token->type = CPP_COMMENT;
679 token->val.str.len = clen;
680 token->val.str.text = buffer;
682 buffer[0] = '/';
683 memcpy (buffer + 1, from, len - 1);
685 /* Finish conversion to a C comment, if necessary. */
686 if (pfile->state.in_directive && type == '/')
688 buffer[1] = '*';
689 buffer[clen - 2] = '*';
690 buffer[clen - 1] = '/';
694 /* Allocate COUNT tokens for RUN. */
695 void
696 _cpp_init_tokenrun (tokenrun *run, unsigned int count)
698 run->base = XNEWVEC (cpp_token, count);
699 run->limit = run->base + count;
700 run->next = NULL;
703 /* Returns the next tokenrun, or creates one if there is none. */
704 static tokenrun *
705 next_tokenrun (tokenrun *run)
707 if (run->next == NULL)
709 run->next = XNEW (tokenrun);
710 run->next->prev = run;
711 _cpp_init_tokenrun (run->next, 250);
714 return run->next;
717 /* Allocate a single token that is invalidated at the same time as the
718 rest of the tokens on the line. Has its line and col set to the
719 same as the last lexed token, so that diagnostics appear in the
720 right place. */
721 cpp_token *
722 _cpp_temp_token (cpp_reader *pfile)
724 cpp_token *old, *result;
726 old = pfile->cur_token - 1;
727 if (pfile->cur_token == pfile->cur_run->limit)
729 pfile->cur_run = next_tokenrun (pfile->cur_run);
730 pfile->cur_token = pfile->cur_run->base;
733 result = pfile->cur_token++;
734 result->src_loc = old->src_loc;
735 return result;
738 /* Lex a token into RESULT (external interface). Takes care of issues
739 like directive handling, token lookahead, multiple include
740 optimization and skipping. */
741 const cpp_token *
742 _cpp_lex_token (cpp_reader *pfile)
744 cpp_token *result;
746 for (;;)
748 if (pfile->cur_token == pfile->cur_run->limit)
750 pfile->cur_run = next_tokenrun (pfile->cur_run);
751 pfile->cur_token = pfile->cur_run->base;
754 if (pfile->lookaheads)
756 pfile->lookaheads--;
757 result = pfile->cur_token++;
759 else
760 result = _cpp_lex_direct (pfile);
762 if (result->flags & BOL)
764 /* Is this a directive. If _cpp_handle_directive returns
765 false, it is an assembler #. */
766 if (result->type == CPP_HASH
767 /* 6.10.3 p 11: Directives in a list of macro arguments
768 gives undefined behavior. This implementation
769 handles the directive as normal. */
770 && pfile->state.parsing_args != 1)
772 if (_cpp_handle_directive (pfile, result->flags & PREV_WHITE))
774 if (pfile->directive_result.type == CPP_PADDING)
775 continue;
776 result = &pfile->directive_result;
779 else if (pfile->state.in_deferred_pragma)
780 result = &pfile->directive_result;
782 if (pfile->cb.line_change && !pfile->state.skipping)
783 pfile->cb.line_change (pfile, result, pfile->state.parsing_args);
786 /* We don't skip tokens in directives. */
787 if (pfile->state.in_directive || pfile->state.in_deferred_pragma)
788 break;
790 /* Outside a directive, invalidate controlling macros. At file
791 EOF, _cpp_lex_direct takes care of popping the buffer, so we never
792 get here and MI optimization works. */
793 pfile->mi_valid = false;
795 if (!pfile->state.skipping || result->type == CPP_EOF)
796 break;
799 return result;
802 /* Returns true if a fresh line has been loaded. */
803 bool
804 _cpp_get_fresh_line (cpp_reader *pfile)
806 int return_at_eof;
808 /* We can't get a new line until we leave the current directive. */
809 if (pfile->state.in_directive)
810 return false;
812 for (;;)
814 cpp_buffer *buffer = pfile->buffer;
816 if (!buffer->need_line)
817 return true;
819 if (buffer->next_line < buffer->rlimit)
821 _cpp_clean_line (pfile);
822 return true;
825 /* First, get out of parsing arguments state. */
826 if (pfile->state.parsing_args)
827 return false;
829 /* End of buffer. Non-empty files should end in a newline. */
830 if (buffer->buf != buffer->rlimit
831 && buffer->next_line > buffer->rlimit
832 && !buffer->from_stage3)
834 /* Only warn once. */
835 buffer->next_line = buffer->rlimit;
836 cpp_error_with_line (pfile, CPP_DL_PEDWARN, pfile->line_table->highest_line,
837 CPP_BUF_COLUMN (buffer, buffer->cur),
838 "no newline at end of file");
841 return_at_eof = buffer->return_at_eof;
842 _cpp_pop_buffer (pfile);
843 if (pfile->buffer == NULL || return_at_eof)
844 return false;
848 #define IF_NEXT_IS(CHAR, THEN_TYPE, ELSE_TYPE) \
849 do \
851 result->type = ELSE_TYPE; \
852 if (*buffer->cur == CHAR) \
853 buffer->cur++, result->type = THEN_TYPE; \
855 while (0)
857 /* Lex a token into pfile->cur_token, which is also incremented, to
858 get diagnostics pointing to the correct location.
860 Does not handle issues such as token lookahead, multiple-include
861 optimization, directives, skipping etc. This function is only
862 suitable for use by _cpp_lex_token, and in special cases like
863 lex_expansion_token which doesn't care for any of these issues.
865 When meeting a newline, returns CPP_EOF if parsing a directive,
866 otherwise returns to the start of the token buffer if permissible.
867 Returns the location of the lexed token. */
868 cpp_token *
869 _cpp_lex_direct (cpp_reader *pfile)
871 cppchar_t c;
872 cpp_buffer *buffer;
873 const unsigned char *comment_start;
874 cpp_token *result = pfile->cur_token++;
876 fresh_line:
877 result->flags = 0;
878 buffer = pfile->buffer;
879 if (buffer->need_line)
881 if (pfile->state.in_deferred_pragma)
883 result->type = CPP_PRAGMA_EOL;
884 pfile->state.in_deferred_pragma = false;
885 if (!pfile->state.pragma_allow_expansion)
886 pfile->state.prevent_expansion--;
887 return result;
889 if (!_cpp_get_fresh_line (pfile))
891 result->type = CPP_EOF;
892 if (!pfile->state.in_directive)
894 /* Tell the compiler the line number of the EOF token. */
895 result->src_loc = pfile->line_table->highest_line;
896 result->flags = BOL;
898 return result;
900 if (!pfile->keep_tokens)
902 pfile->cur_run = &pfile->base_run;
903 result = pfile->base_run.base;
904 pfile->cur_token = result + 1;
906 result->flags = BOL;
907 if (pfile->state.parsing_args == 2)
908 result->flags |= PREV_WHITE;
910 buffer = pfile->buffer;
911 update_tokens_line:
912 result->src_loc = pfile->line_table->highest_line;
914 skipped_white:
915 if (buffer->cur >= buffer->notes[buffer->cur_note].pos
916 && !pfile->overlaid_buffer)
918 _cpp_process_line_notes (pfile, false);
919 result->src_loc = pfile->line_table->highest_line;
921 c = *buffer->cur++;
923 LINEMAP_POSITION_FOR_COLUMN (result->src_loc, pfile->line_table,
924 CPP_BUF_COLUMN (buffer, buffer->cur));
926 switch (c)
928 case ' ': case '\t': case '\f': case '\v': case '\0':
929 result->flags |= PREV_WHITE;
930 skip_whitespace (pfile, c);
931 goto skipped_white;
933 case '\n':
934 if (buffer->cur < buffer->rlimit)
935 CPP_INCREMENT_LINE (pfile, 0);
936 buffer->need_line = true;
937 goto fresh_line;
939 case '0': case '1': case '2': case '3': case '4':
940 case '5': case '6': case '7': case '8': case '9':
942 struct normalize_state nst = INITIAL_NORMALIZE_STATE;
943 result->type = CPP_NUMBER;
944 lex_number (pfile, &result->val.str, &nst);
945 warn_about_normalization (pfile, result, &nst);
946 break;
949 case 'L':
950 /* 'L' may introduce wide characters or strings. */
951 if (*buffer->cur == '\'' || *buffer->cur == '"')
953 lex_string (pfile, result, buffer->cur - 1);
954 break;
956 /* Fall through. */
958 case '_':
959 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
960 case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
961 case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
962 case 's': case 't': case 'u': case 'v': case 'w': case 'x':
963 case 'y': case 'z':
964 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
965 case 'G': case 'H': case 'I': case 'J': case 'K':
966 case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
967 case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
968 case 'Y': case 'Z':
969 result->type = CPP_NAME;
971 struct normalize_state nst = INITIAL_NORMALIZE_STATE;
972 result->val.node = lex_identifier (pfile, buffer->cur - 1, false,
973 &nst);
974 warn_about_normalization (pfile, result, &nst);
977 /* Convert named operators to their proper types. */
978 if (result->val.node->flags & NODE_OPERATOR)
980 result->flags |= NAMED_OP;
981 result->type = (enum cpp_ttype) result->val.node->directive_index;
983 break;
985 case '\'':
986 case '"':
987 lex_string (pfile, result, buffer->cur - 1);
988 break;
990 case '/':
991 /* A potential block or line comment. */
992 comment_start = buffer->cur;
993 c = *buffer->cur;
995 if (c == '*')
997 if (_cpp_skip_block_comment (pfile))
998 cpp_error (pfile, CPP_DL_ERROR, "unterminated comment");
1000 else if (c == '/' && (CPP_OPTION (pfile, cplusplus_comments)
1001 || cpp_in_system_header (pfile)))
1003 /* Warn about comments only if pedantically GNUC89, and not
1004 in system headers. */
1005 if (CPP_OPTION (pfile, lang) == CLK_GNUC89 && CPP_PEDANTIC (pfile)
1006 && ! buffer->warned_cplusplus_comments)
1008 cpp_error (pfile, CPP_DL_PEDWARN,
1009 "C++ style comments are not allowed in ISO C90");
1010 cpp_error (pfile, CPP_DL_PEDWARN,
1011 "(this will be reported only once per input file)");
1012 buffer->warned_cplusplus_comments = 1;
1015 if (skip_line_comment (pfile) && CPP_OPTION (pfile, warn_comments))
1016 cpp_error (pfile, CPP_DL_WARNING, "multi-line comment");
1018 else if (c == '=')
1020 buffer->cur++;
1021 result->type = CPP_DIV_EQ;
1022 break;
1024 else
1026 result->type = CPP_DIV;
1027 break;
1030 if (!pfile->state.save_comments)
1032 result->flags |= PREV_WHITE;
1033 goto update_tokens_line;
1036 /* Save the comment as a token in its own right. */
1037 save_comment (pfile, result, comment_start, c);
1038 break;
1040 case '<':
1041 if (pfile->state.angled_headers)
1043 lex_string (pfile, result, buffer->cur - 1);
1044 break;
1047 result->type = CPP_LESS;
1048 if (*buffer->cur == '=')
1049 buffer->cur++, result->type = CPP_LESS_EQ;
1050 else if (*buffer->cur == '<')
1052 buffer->cur++;
1053 IF_NEXT_IS ('=', CPP_LSHIFT_EQ, CPP_LSHIFT);
1055 else if (*buffer->cur == '?' && CPP_OPTION (pfile, cplusplus))
1057 buffer->cur++;
1058 IF_NEXT_IS ('=', CPP_MIN_EQ, CPP_MIN);
1060 else if (CPP_OPTION (pfile, digraphs))
1062 if (*buffer->cur == ':')
1064 buffer->cur++;
1065 result->flags |= DIGRAPH;
1066 result->type = CPP_OPEN_SQUARE;
1068 else if (*buffer->cur == '%')
1070 buffer->cur++;
1071 result->flags |= DIGRAPH;
1072 result->type = CPP_OPEN_BRACE;
1075 break;
1077 case '>':
1078 result->type = CPP_GREATER;
1079 if (*buffer->cur == '=')
1080 buffer->cur++, result->type = CPP_GREATER_EQ;
1081 else if (*buffer->cur == '>')
1083 buffer->cur++;
1084 IF_NEXT_IS ('=', CPP_RSHIFT_EQ, CPP_RSHIFT);
1086 else if (*buffer->cur == '?' && CPP_OPTION (pfile, cplusplus))
1088 buffer->cur++;
1089 IF_NEXT_IS ('=', CPP_MAX_EQ, CPP_MAX);
1091 break;
1093 case '%':
1094 result->type = CPP_MOD;
1095 if (*buffer->cur == '=')
1096 buffer->cur++, result->type = CPP_MOD_EQ;
1097 else if (CPP_OPTION (pfile, digraphs))
1099 if (*buffer->cur == ':')
1101 buffer->cur++;
1102 result->flags |= DIGRAPH;
1103 result->type = CPP_HASH;
1104 if (*buffer->cur == '%' && buffer->cur[1] == ':')
1105 buffer->cur += 2, result->type = CPP_PASTE;
1107 else if (*buffer->cur == '>')
1109 buffer->cur++;
1110 result->flags |= DIGRAPH;
1111 result->type = CPP_CLOSE_BRACE;
1114 break;
1116 case '.':
1117 result->type = CPP_DOT;
1118 if (ISDIGIT (*buffer->cur))
1120 struct normalize_state nst = INITIAL_NORMALIZE_STATE;
1121 result->type = CPP_NUMBER;
1122 lex_number (pfile, &result->val.str, &nst);
1123 warn_about_normalization (pfile, result, &nst);
1125 else if (*buffer->cur == '.' && buffer->cur[1] == '.')
1126 buffer->cur += 2, result->type = CPP_ELLIPSIS;
1127 else if (*buffer->cur == '*' && CPP_OPTION (pfile, cplusplus))
1128 buffer->cur++, result->type = CPP_DOT_STAR;
1129 break;
1131 case '+':
1132 result->type = CPP_PLUS;
1133 if (*buffer->cur == '+')
1134 buffer->cur++, result->type = CPP_PLUS_PLUS;
1135 else if (*buffer->cur == '=')
1136 buffer->cur++, result->type = CPP_PLUS_EQ;
1137 break;
1139 case '-':
1140 result->type = CPP_MINUS;
1141 if (*buffer->cur == '>')
1143 buffer->cur++;
1144 result->type = CPP_DEREF;
1145 if (*buffer->cur == '*' && CPP_OPTION (pfile, cplusplus))
1146 buffer->cur++, result->type = CPP_DEREF_STAR;
1148 else if (*buffer->cur == '-')
1149 buffer->cur++, result->type = CPP_MINUS_MINUS;
1150 else if (*buffer->cur == '=')
1151 buffer->cur++, result->type = CPP_MINUS_EQ;
1152 break;
1154 case '&':
1155 result->type = CPP_AND;
1156 if (*buffer->cur == '&')
1157 buffer->cur++, result->type = CPP_AND_AND;
1158 else if (*buffer->cur == '=')
1159 buffer->cur++, result->type = CPP_AND_EQ;
1160 break;
1162 case '|':
1163 result->type = CPP_OR;
1164 if (*buffer->cur == '|')
1165 buffer->cur++, result->type = CPP_OR_OR;
1166 else if (*buffer->cur == '=')
1167 buffer->cur++, result->type = CPP_OR_EQ;
1168 break;
1170 case ':':
1171 result->type = CPP_COLON;
1172 if (*buffer->cur == ':' && CPP_OPTION (pfile, cplusplus))
1173 buffer->cur++, result->type = CPP_SCOPE;
1174 else if (*buffer->cur == '>' && CPP_OPTION (pfile, digraphs))
1176 buffer->cur++;
1177 result->flags |= DIGRAPH;
1178 result->type = CPP_CLOSE_SQUARE;
1180 break;
1182 case '*': IF_NEXT_IS ('=', CPP_MULT_EQ, CPP_MULT); break;
1183 case '=': IF_NEXT_IS ('=', CPP_EQ_EQ, CPP_EQ); break;
1184 case '!': IF_NEXT_IS ('=', CPP_NOT_EQ, CPP_NOT); break;
1185 case '^': IF_NEXT_IS ('=', CPP_XOR_EQ, CPP_XOR); break;
1186 case '#': IF_NEXT_IS ('#', CPP_PASTE, CPP_HASH); break;
1188 case '?': result->type = CPP_QUERY; break;
1189 case '~': result->type = CPP_COMPL; break;
1190 case ',': result->type = CPP_COMMA; break;
1191 case '(': result->type = CPP_OPEN_PAREN; break;
1192 case ')': result->type = CPP_CLOSE_PAREN; break;
1193 case '[': result->type = CPP_OPEN_SQUARE; break;
1194 case ']': result->type = CPP_CLOSE_SQUARE; break;
1195 case '{': result->type = CPP_OPEN_BRACE; break;
1196 case '}': result->type = CPP_CLOSE_BRACE; break;
1197 case ';': result->type = CPP_SEMICOLON; break;
1199 /* @ is a punctuator in Objective-C. */
1200 case '@': result->type = CPP_ATSIGN; break;
1202 case '$':
1203 case '\\':
1205 const uchar *base = --buffer->cur;
1206 struct normalize_state nst = INITIAL_NORMALIZE_STATE;
1208 if (forms_identifier_p (pfile, true, &nst))
1210 result->type = CPP_NAME;
1211 result->val.node = lex_identifier (pfile, base, true, &nst);
1212 warn_about_normalization (pfile, result, &nst);
1213 break;
1215 buffer->cur++;
1218 default:
1219 create_literal (pfile, result, buffer->cur - 1, 1, CPP_OTHER);
1220 break;
1223 return result;
1226 /* An upper bound on the number of bytes needed to spell TOKEN.
1227 Does not include preceding whitespace. */
1228 unsigned int
1229 cpp_token_len (const cpp_token *token)
1231 unsigned int len;
1233 switch (TOKEN_SPELL (token))
1235 default: len = 4; break;
1236 case SPELL_LITERAL: len = token->val.str.len; break;
1237 case SPELL_IDENT: len = NODE_LEN (token->val.node) * 10; break;
1240 return len;
1243 /* Parse UTF-8 out of NAMEP and place a \U escape in BUFFER.
1244 Return the number of bytes read out of NAME. (There are always
1245 10 bytes written to BUFFER.) */
1247 static size_t
1248 utf8_to_ucn (unsigned char *buffer, const unsigned char *name)
1250 int j;
1251 int ucn_len = 0;
1252 int ucn_len_c;
1253 unsigned t;
1254 unsigned long utf32;
1256 /* Compute the length of the UTF-8 sequence. */
1257 for (t = *name; t & 0x80; t <<= 1)
1258 ucn_len++;
1260 utf32 = *name & (0x7F >> ucn_len);
1261 for (ucn_len_c = 1; ucn_len_c < ucn_len; ucn_len_c++)
1263 utf32 = (utf32 << 6) | (*++name & 0x3F);
1265 /* Ill-formed UTF-8. */
1266 if ((*name & ~0x3F) != 0x80)
1267 abort ();
1270 *buffer++ = '\\';
1271 *buffer++ = 'U';
1272 for (j = 7; j >= 0; j--)
1273 *buffer++ = "0123456789abcdef"[(utf32 >> (4 * j)) & 0xF];
1274 return ucn_len;
1278 /* Write the spelling of a token TOKEN to BUFFER. The buffer must
1279 already contain the enough space to hold the token's spelling.
1280 Returns a pointer to the character after the last character written.
1281 FORSTRING is true if this is to be the spelling after translation
1282 phase 1 (this is different for UCNs).
1283 FIXME: Would be nice if we didn't need the PFILE argument. */
1284 unsigned char *
1285 cpp_spell_token (cpp_reader *pfile, const cpp_token *token,
1286 unsigned char *buffer, bool forstring)
1288 switch (TOKEN_SPELL (token))
1290 case SPELL_OPERATOR:
1292 const unsigned char *spelling;
1293 unsigned char c;
1295 if (token->flags & DIGRAPH)
1296 spelling
1297 = digraph_spellings[(int) token->type - (int) CPP_FIRST_DIGRAPH];
1298 else if (token->flags & NAMED_OP)
1299 goto spell_ident;
1300 else
1301 spelling = TOKEN_NAME (token);
1303 while ((c = *spelling++) != '\0')
1304 *buffer++ = c;
1306 break;
1308 spell_ident:
1309 case SPELL_IDENT:
1310 if (forstring)
1312 memcpy (buffer, NODE_NAME (token->val.node),
1313 NODE_LEN (token->val.node));
1314 buffer += NODE_LEN (token->val.node);
1316 else
1318 size_t i;
1319 const unsigned char * name = NODE_NAME (token->val.node);
1321 for (i = 0; i < NODE_LEN (token->val.node); i++)
1322 if (name[i] & ~0x7F)
1324 i += utf8_to_ucn (buffer, name + i) - 1;
1325 buffer += 10;
1327 else
1328 *buffer++ = NODE_NAME (token->val.node)[i];
1330 break;
1332 case SPELL_LITERAL:
1333 memcpy (buffer, token->val.str.text, token->val.str.len);
1334 buffer += token->val.str.len;
1335 break;
1337 case SPELL_NONE:
1338 cpp_error (pfile, CPP_DL_ICE,
1339 "unspellable token %s", TOKEN_NAME (token));
1340 break;
1343 return buffer;
1346 /* Returns TOKEN spelt as a null-terminated string. The string is
1347 freed when the reader is destroyed. Useful for diagnostics. */
1348 unsigned char *
1349 cpp_token_as_text (cpp_reader *pfile, const cpp_token *token)
1351 unsigned int len = cpp_token_len (token) + 1;
1352 unsigned char *start = _cpp_unaligned_alloc (pfile, len), *end;
1354 end = cpp_spell_token (pfile, token, start, false);
1355 end[0] = '\0';
1357 return start;
1360 /* Used by C front ends, which really should move to using
1361 cpp_token_as_text. */
1362 const char *
1363 cpp_type2name (enum cpp_ttype type)
1365 return (const char *) token_spellings[type].name;
1368 /* Writes the spelling of token to FP, without any preceding space.
1369 Separated from cpp_spell_token for efficiency - to avoid stdio
1370 double-buffering. */
1371 void
1372 cpp_output_token (const cpp_token *token, FILE *fp)
1374 switch (TOKEN_SPELL (token))
1376 case SPELL_OPERATOR:
1378 const unsigned char *spelling;
1379 int c;
1381 if (token->flags & DIGRAPH)
1382 spelling
1383 = digraph_spellings[(int) token->type - (int) CPP_FIRST_DIGRAPH];
1384 else if (token->flags & NAMED_OP)
1385 goto spell_ident;
1386 else
1387 spelling = TOKEN_NAME (token);
1389 c = *spelling;
1391 putc (c, fp);
1392 while ((c = *++spelling) != '\0');
1394 break;
1396 spell_ident:
1397 case SPELL_IDENT:
1399 size_t i;
1400 const unsigned char * name = NODE_NAME (token->val.node);
1402 for (i = 0; i < NODE_LEN (token->val.node); i++)
1403 if (name[i] & ~0x7F)
1405 unsigned char buffer[10];
1406 i += utf8_to_ucn (buffer, name + i) - 1;
1407 fwrite (buffer, 1, 10, fp);
1409 else
1410 fputc (NODE_NAME (token->val.node)[i], fp);
1412 break;
1414 case SPELL_LITERAL:
1415 fwrite (token->val.str.text, 1, token->val.str.len, fp);
1416 break;
1418 case SPELL_NONE:
1419 /* An error, most probably. */
1420 break;
1424 /* Compare two tokens. */
1426 _cpp_equiv_tokens (const cpp_token *a, const cpp_token *b)
1428 if (a->type == b->type && a->flags == b->flags)
1429 switch (TOKEN_SPELL (a))
1431 default: /* Keep compiler happy. */
1432 case SPELL_OPERATOR:
1433 return 1;
1434 case SPELL_NONE:
1435 return (a->type != CPP_MACRO_ARG || a->val.arg_no == b->val.arg_no);
1436 case SPELL_IDENT:
1437 return a->val.node == b->val.node;
1438 case SPELL_LITERAL:
1439 return (a->val.str.len == b->val.str.len
1440 && !memcmp (a->val.str.text, b->val.str.text,
1441 a->val.str.len));
1444 return 0;
1447 /* Returns nonzero if a space should be inserted to avoid an
1448 accidental token paste for output. For simplicity, it is
1449 conservative, and occasionally advises a space where one is not
1450 needed, e.g. "." and ".2". */
1452 cpp_avoid_paste (cpp_reader *pfile, const cpp_token *token1,
1453 const cpp_token *token2)
1455 enum cpp_ttype a = token1->type, b = token2->type;
1456 cppchar_t c;
1458 if (token1->flags & NAMED_OP)
1459 a = CPP_NAME;
1460 if (token2->flags & NAMED_OP)
1461 b = CPP_NAME;
1463 c = EOF;
1464 if (token2->flags & DIGRAPH)
1465 c = digraph_spellings[(int) b - (int) CPP_FIRST_DIGRAPH][0];
1466 else if (token_spellings[b].category == SPELL_OPERATOR)
1467 c = token_spellings[b].name[0];
1469 /* Quickly get everything that can paste with an '='. */
1470 if ((int) a <= (int) CPP_LAST_EQ && c == '=')
1471 return 1;
1473 switch (a)
1475 case CPP_GREATER: return c == '>' || c == '?';
1476 case CPP_LESS: return c == '<' || c == '?' || c == '%' || c == ':';
1477 case CPP_PLUS: return c == '+';
1478 case CPP_MINUS: return c == '-' || c == '>';
1479 case CPP_DIV: return c == '/' || c == '*'; /* Comments. */
1480 case CPP_MOD: return c == ':' || c == '>';
1481 case CPP_AND: return c == '&';
1482 case CPP_OR: return c == '|';
1483 case CPP_COLON: return c == ':' || c == '>';
1484 case CPP_DEREF: return c == '*';
1485 case CPP_DOT: return c == '.' || c == '%' || b == CPP_NUMBER;
1486 case CPP_HASH: return c == '#' || c == '%'; /* Digraph form. */
1487 case CPP_NAME: return ((b == CPP_NUMBER
1488 && name_p (pfile, &token2->val.str))
1489 || b == CPP_NAME
1490 || b == CPP_CHAR || b == CPP_STRING); /* L */
1491 case CPP_NUMBER: return (b == CPP_NUMBER || b == CPP_NAME
1492 || c == '.' || c == '+' || c == '-');
1493 /* UCNs */
1494 case CPP_OTHER: return ((token1->val.str.text[0] == '\\'
1495 && b == CPP_NAME)
1496 || (CPP_OPTION (pfile, objc)
1497 && token1->val.str.text[0] == '@'
1498 && (b == CPP_NAME || b == CPP_STRING)));
1499 default: break;
1502 return 0;
1505 /* Output all the remaining tokens on the current line, and a newline
1506 character, to FP. Leading whitespace is removed. If there are
1507 macros, special token padding is not performed. */
1508 void
1509 cpp_output_line (cpp_reader *pfile, FILE *fp)
1511 const cpp_token *token;
1513 token = cpp_get_token (pfile);
1514 while (token->type != CPP_EOF)
1516 cpp_output_token (token, fp);
1517 token = cpp_get_token (pfile);
1518 if (token->flags & PREV_WHITE)
1519 putc (' ', fp);
1522 putc ('\n', fp);
1525 /* Memory buffers. Changing these three constants can have a dramatic
1526 effect on performance. The values here are reasonable defaults,
1527 but might be tuned. If you adjust them, be sure to test across a
1528 range of uses of cpplib, including heavy nested function-like macro
1529 expansion. Also check the change in peak memory usage (NJAMD is a
1530 good tool for this). */
1531 #define MIN_BUFF_SIZE 8000
1532 #define BUFF_SIZE_UPPER_BOUND(MIN_SIZE) (MIN_BUFF_SIZE + (MIN_SIZE) * 3 / 2)
1533 #define EXTENDED_BUFF_SIZE(BUFF, MIN_EXTRA) \
1534 (MIN_EXTRA + ((BUFF)->limit - (BUFF)->cur) * 2)
1536 #if MIN_BUFF_SIZE > BUFF_SIZE_UPPER_BOUND (0)
1537 #error BUFF_SIZE_UPPER_BOUND must be at least as large as MIN_BUFF_SIZE!
1538 #endif
1540 /* Create a new allocation buffer. Place the control block at the end
1541 of the buffer, so that buffer overflows will cause immediate chaos. */
1542 static _cpp_buff *
1543 new_buff (size_t len)
1545 _cpp_buff *result;
1546 unsigned char *base;
1548 if (len < MIN_BUFF_SIZE)
1549 len = MIN_BUFF_SIZE;
1550 len = CPP_ALIGN (len);
1552 base = XNEWVEC (unsigned char, len + sizeof (_cpp_buff));
1553 result = (_cpp_buff *) (base + len);
1554 result->base = base;
1555 result->cur = base;
1556 result->limit = base + len;
1557 result->next = NULL;
1558 return result;
1561 /* Place a chain of unwanted allocation buffers on the free list. */
1562 void
1563 _cpp_release_buff (cpp_reader *pfile, _cpp_buff *buff)
1565 _cpp_buff *end = buff;
1567 while (end->next)
1568 end = end->next;
1569 end->next = pfile->free_buffs;
1570 pfile->free_buffs = buff;
1573 /* Return a free buffer of size at least MIN_SIZE. */
1574 _cpp_buff *
1575 _cpp_get_buff (cpp_reader *pfile, size_t min_size)
1577 _cpp_buff *result, **p;
1579 for (p = &pfile->free_buffs;; p = &(*p)->next)
1581 size_t size;
1583 if (*p == NULL)
1584 return new_buff (min_size);
1585 result = *p;
1586 size = result->limit - result->base;
1587 /* Return a buffer that's big enough, but don't waste one that's
1588 way too big. */
1589 if (size >= min_size && size <= BUFF_SIZE_UPPER_BOUND (min_size))
1590 break;
1593 *p = result->next;
1594 result->next = NULL;
1595 result->cur = result->base;
1596 return result;
1599 /* Creates a new buffer with enough space to hold the uncommitted
1600 remaining bytes of BUFF, and at least MIN_EXTRA more bytes. Copies
1601 the excess bytes to the new buffer. Chains the new buffer after
1602 BUFF, and returns the new buffer. */
1603 _cpp_buff *
1604 _cpp_append_extend_buff (cpp_reader *pfile, _cpp_buff *buff, size_t min_extra)
1606 size_t size = EXTENDED_BUFF_SIZE (buff, min_extra);
1607 _cpp_buff *new_buff = _cpp_get_buff (pfile, size);
1609 buff->next = new_buff;
1610 memcpy (new_buff->base, buff->cur, BUFF_ROOM (buff));
1611 return new_buff;
1614 /* Creates a new buffer with enough space to hold the uncommitted
1615 remaining bytes of the buffer pointed to by BUFF, and at least
1616 MIN_EXTRA more bytes. Copies the excess bytes to the new buffer.
1617 Chains the new buffer before the buffer pointed to by BUFF, and
1618 updates the pointer to point to the new buffer. */
1619 void
1620 _cpp_extend_buff (cpp_reader *pfile, _cpp_buff **pbuff, size_t min_extra)
1622 _cpp_buff *new_buff, *old_buff = *pbuff;
1623 size_t size = EXTENDED_BUFF_SIZE (old_buff, min_extra);
1625 new_buff = _cpp_get_buff (pfile, size);
1626 memcpy (new_buff->base, old_buff->cur, BUFF_ROOM (old_buff));
1627 new_buff->next = old_buff;
1628 *pbuff = new_buff;
1631 /* Free a chain of buffers starting at BUFF. */
1632 void
1633 _cpp_free_buff (_cpp_buff *buff)
1635 _cpp_buff *next;
1637 for (; buff; buff = next)
1639 next = buff->next;
1640 free (buff->base);
1644 /* Allocate permanent, unaligned storage of length LEN. */
1645 unsigned char *
1646 _cpp_unaligned_alloc (cpp_reader *pfile, size_t len)
1648 _cpp_buff *buff = pfile->u_buff;
1649 unsigned char *result = buff->cur;
1651 if (len > (size_t) (buff->limit - result))
1653 buff = _cpp_get_buff (pfile, len);
1654 buff->next = pfile->u_buff;
1655 pfile->u_buff = buff;
1656 result = buff->cur;
1659 buff->cur = result + len;
1660 return result;
1663 /* Allocate permanent, unaligned storage of length LEN from a_buff.
1664 That buffer is used for growing allocations when saving macro
1665 replacement lists in a #define, and when parsing an answer to an
1666 assertion in #assert, #unassert or #if (and therefore possibly
1667 whilst expanding macros). It therefore must not be used by any
1668 code that they might call: specifically the lexer and the guts of
1669 the macro expander.
1671 All existing other uses clearly fit this restriction: storing
1672 registered pragmas during initialization. */
1673 unsigned char *
1674 _cpp_aligned_alloc (cpp_reader *pfile, size_t len)
1676 _cpp_buff *buff = pfile->a_buff;
1677 unsigned char *result = buff->cur;
1679 if (len > (size_t) (buff->limit - result))
1681 buff = _cpp_get_buff (pfile, len);
1682 buff->next = pfile->a_buff;
1683 pfile->a_buff = buff;
1684 result = buff->cur;
1687 buff->cur = result + len;
1688 return result;
1691 /* Say which field of TOK is in use. */
1693 enum cpp_token_fld_kind
1694 cpp_token_val_index (cpp_token *tok)
1696 switch (TOKEN_SPELL (tok))
1698 case SPELL_IDENT:
1699 return CPP_TOKEN_FLD_NODE;
1700 case SPELL_LITERAL:
1701 return CPP_TOKEN_FLD_STR;
1702 case SPELL_NONE:
1703 if (tok->type == CPP_MACRO_ARG)
1704 return CPP_TOKEN_FLD_ARG_NO;
1705 else if (tok->type == CPP_PADDING)
1706 return CPP_TOKEN_FLD_SOURCE;
1707 else if (tok->type == CPP_PRAGMA)
1708 return CPP_TOKEN_FLD_PRAGMA;
1709 /* else fall through */
1710 default:
1711 return CPP_TOKEN_FLD_NONE;