2008-04-25 Kai Tietz <kai.tietz@onvision.com>
[official-gcc.git] / libcpp / lex.c
blob772a8701654b8af476488df89e1d3e1b69c9f26d
1 /* CPP Library - lexical analysis.
2 Copyright (C) 2000, 2001, 2002, 2003, 2004, 2005, 2007 Free Software Foundation, Inc.
3 Contributed by Per Bothner, 1994-95.
4 Based on CCCP program by Paul Rubin, June 1986
5 Adapted to ANSI C, Richard Stallman, Jan 1987
6 Broken out to separate file, Zack Weinberg, Mar 2000
8 This program is free software; you can redistribute it and/or modify it
9 under the terms of the GNU General Public License as published by the
10 Free Software Foundation; either version 2, or (at your option) any
11 later version.
13 This program is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with this program; if not, write to the Free Software
20 Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
22 #include "config.h"
23 #include "system.h"
24 #include "cpplib.h"
25 #include "internal.h"
27 enum spell_type
29 SPELL_OPERATOR = 0,
30 SPELL_IDENT,
31 SPELL_LITERAL,
32 SPELL_NONE
35 struct token_spelling
37 enum spell_type category;
38 const unsigned char *name;
41 static const unsigned char *const digraph_spellings[] =
42 { UC"%:", UC"%:%:", UC"<:", UC":>", UC"<%", UC"%>" };
44 #define OP(e, s) { SPELL_OPERATOR, UC s },
45 #define TK(e, s) { SPELL_ ## s, UC #e },
46 static const struct token_spelling token_spellings[N_TTYPES] = { TTYPE_TABLE };
47 #undef OP
48 #undef TK
50 #define TOKEN_SPELL(token) (token_spellings[(token)->type].category)
51 #define TOKEN_NAME(token) (token_spellings[(token)->type].name)
53 static void add_line_note (cpp_buffer *, const uchar *, unsigned int);
54 static int skip_line_comment (cpp_reader *);
55 static void skip_whitespace (cpp_reader *, cppchar_t);
56 static void lex_string (cpp_reader *, cpp_token *, const uchar *);
57 static void save_comment (cpp_reader *, cpp_token *, const uchar *, cppchar_t);
58 static void create_literal (cpp_reader *, cpp_token *, const uchar *,
59 unsigned int, enum cpp_ttype);
60 static bool warn_in_comment (cpp_reader *, _cpp_line_note *);
61 static int name_p (cpp_reader *, const cpp_string *);
62 static tokenrun *next_tokenrun (tokenrun *);
64 static _cpp_buff *new_buff (size_t);
67 /* Utility routine:
69 Compares, the token TOKEN to the NUL-terminated string STRING.
70 TOKEN must be a CPP_NAME. Returns 1 for equal, 0 for unequal. */
71 int
72 cpp_ideq (const cpp_token *token, const char *string)
74 if (token->type != CPP_NAME)
75 return 0;
77 return !ustrcmp (NODE_NAME (token->val.node), (const uchar *) string);
80 /* Record a note TYPE at byte POS into the current cleaned logical
81 line. */
82 static void
83 add_line_note (cpp_buffer *buffer, const uchar *pos, unsigned int type)
85 if (buffer->notes_used == buffer->notes_cap)
87 buffer->notes_cap = buffer->notes_cap * 2 + 200;
88 buffer->notes = XRESIZEVEC (_cpp_line_note, buffer->notes,
89 buffer->notes_cap);
92 buffer->notes[buffer->notes_used].pos = pos;
93 buffer->notes[buffer->notes_used].type = type;
94 buffer->notes_used++;
97 /* Returns with a logical line that contains no escaped newlines or
98 trigraphs. This is a time-critical inner loop. */
99 void
100 _cpp_clean_line (cpp_reader *pfile)
102 cpp_buffer *buffer;
103 const uchar *s;
104 uchar c, *d, *p;
106 buffer = pfile->buffer;
107 buffer->cur_note = buffer->notes_used = 0;
108 buffer->cur = buffer->line_base = buffer->next_line;
109 buffer->need_line = false;
110 s = buffer->next_line - 1;
112 if (!buffer->from_stage3)
114 const uchar *pbackslash = NULL;
116 /* Short circuit for the common case of an un-escaped line with
117 no trigraphs. The primary win here is by not writing any
118 data back to memory until we have to. */
119 for (;;)
121 c = *++s;
122 if (__builtin_expect (c == '\n', false)
123 || __builtin_expect (c == '\r', false))
125 d = (uchar *) s;
127 if (__builtin_expect (s == buffer->rlimit, false))
128 goto done;
130 /* DOS line ending? */
131 if (__builtin_expect (c == '\r', false)
132 && s[1] == '\n')
134 s++;
135 if (s == buffer->rlimit)
136 goto done;
139 if (__builtin_expect (pbackslash == NULL, true))
140 goto done;
142 /* Check for escaped newline. */
143 p = d;
144 while (is_nvspace (p[-1]))
145 p--;
146 if (p - 1 != pbackslash)
147 goto done;
149 /* Have an escaped newline; process it and proceed to
150 the slow path. */
151 add_line_note (buffer, p - 1, p != d ? ' ' : '\\');
152 d = p - 2;
153 buffer->next_line = p - 1;
154 break;
156 if (__builtin_expect (c == '\\', false))
157 pbackslash = s;
158 else if (__builtin_expect (c == '?', false)
159 && __builtin_expect (s[1] == '?', false)
160 && _cpp_trigraph_map[s[2]])
162 /* Have a trigraph. We may or may not have to convert
163 it. Add a line note regardless, for -Wtrigraphs. */
164 add_line_note (buffer, s, s[2]);
165 if (CPP_OPTION (pfile, trigraphs))
167 /* We do, and that means we have to switch to the
168 slow path. */
169 d = (uchar *) s;
170 *d = _cpp_trigraph_map[s[2]];
171 s += 2;
172 break;
178 for (;;)
180 c = *++s;
181 *++d = c;
183 if (c == '\n' || c == '\r')
185 /* Handle DOS line endings. */
186 if (c == '\r' && s != buffer->rlimit && s[1] == '\n')
187 s++;
188 if (s == buffer->rlimit)
189 break;
191 /* Escaped? */
192 p = d;
193 while (p != buffer->next_line && is_nvspace (p[-1]))
194 p--;
195 if (p == buffer->next_line || p[-1] != '\\')
196 break;
198 add_line_note (buffer, p - 1, p != d ? ' ': '\\');
199 d = p - 2;
200 buffer->next_line = p - 1;
202 else if (c == '?' && s[1] == '?' && _cpp_trigraph_map[s[2]])
204 /* Add a note regardless, for the benefit of -Wtrigraphs. */
205 add_line_note (buffer, d, s[2]);
206 if (CPP_OPTION (pfile, trigraphs))
208 *d = _cpp_trigraph_map[s[2]];
209 s += 2;
214 else
217 s++;
218 while (*s != '\n' && *s != '\r');
219 d = (uchar *) s;
221 /* Handle DOS line endings. */
222 if (*s == '\r' && s != buffer->rlimit && s[1] == '\n')
223 s++;
226 done:
227 *d = '\n';
228 /* A sentinel note that should never be processed. */
229 add_line_note (buffer, d + 1, '\n');
230 buffer->next_line = s + 1;
233 /* Return true if the trigraph indicated by NOTE should be warned
234 about in a comment. */
235 static bool
236 warn_in_comment (cpp_reader *pfile, _cpp_line_note *note)
238 const uchar *p;
240 /* Within comments we don't warn about trigraphs, unless the
241 trigraph forms an escaped newline, as that may change
242 behavior. */
243 if (note->type != '/')
244 return false;
246 /* If -trigraphs, then this was an escaped newline iff the next note
247 is coincident. */
248 if (CPP_OPTION (pfile, trigraphs))
249 return note[1].pos == note->pos;
251 /* Otherwise, see if this forms an escaped newline. */
252 p = note->pos + 3;
253 while (is_nvspace (*p))
254 p++;
256 /* There might have been escaped newlines between the trigraph and the
257 newline we found. Hence the position test. */
258 return (*p == '\n' && p < note[1].pos);
261 /* Process the notes created by add_line_note as far as the current
262 location. */
263 void
264 _cpp_process_line_notes (cpp_reader *pfile, int in_comment)
266 cpp_buffer *buffer = pfile->buffer;
268 for (;;)
270 _cpp_line_note *note = &buffer->notes[buffer->cur_note];
271 unsigned int col;
273 if (note->pos > buffer->cur)
274 break;
276 buffer->cur_note++;
277 col = CPP_BUF_COLUMN (buffer, note->pos + 1);
279 if (note->type == '\\' || note->type == ' ')
281 if (note->type == ' ' && !in_comment)
282 cpp_error_with_line (pfile, CPP_DL_WARNING, pfile->line_table->highest_line, col,
283 "backslash and newline separated by space");
285 if (buffer->next_line > buffer->rlimit)
287 cpp_error_with_line (pfile, CPP_DL_PEDWARN, pfile->line_table->highest_line, col,
288 "backslash-newline at end of file");
289 /* Prevent "no newline at end of file" warning. */
290 buffer->next_line = buffer->rlimit;
293 buffer->line_base = note->pos;
294 CPP_INCREMENT_LINE (pfile, 0);
296 else if (_cpp_trigraph_map[note->type])
298 if (CPP_OPTION (pfile, warn_trigraphs)
299 && (!in_comment || warn_in_comment (pfile, note)))
301 if (CPP_OPTION (pfile, trigraphs))
302 cpp_error_with_line (pfile, CPP_DL_WARNING, pfile->line_table->highest_line, col,
303 "trigraph ??%c converted to %c",
304 note->type,
305 (int) _cpp_trigraph_map[note->type]);
306 else
308 cpp_error_with_line
309 (pfile, CPP_DL_WARNING, pfile->line_table->highest_line, col,
310 "trigraph ??%c ignored, use -trigraphs to enable",
311 note->type);
315 else
316 abort ();
320 /* Skip a C-style block comment. We find the end of the comment by
321 seeing if an asterisk is before every '/' we encounter. Returns
322 nonzero if comment terminated by EOF, zero otherwise.
324 Buffer->cur points to the initial asterisk of the comment. */
325 bool
326 _cpp_skip_block_comment (cpp_reader *pfile)
328 cpp_buffer *buffer = pfile->buffer;
329 const uchar *cur = buffer->cur;
330 uchar c;
332 cur++;
333 if (*cur == '/')
334 cur++;
336 for (;;)
338 /* People like decorating comments with '*', so check for '/'
339 instead for efficiency. */
340 c = *cur++;
342 if (c == '/')
344 if (cur[-2] == '*')
345 break;
347 /* Warn about potential nested comments, but not if the '/'
348 comes immediately before the true comment delimiter.
349 Don't bother to get it right across escaped newlines. */
350 if (CPP_OPTION (pfile, warn_comments)
351 && cur[0] == '*' && cur[1] != '/')
353 buffer->cur = cur;
354 cpp_error_with_line (pfile, CPP_DL_WARNING,
355 pfile->line_table->highest_line, CPP_BUF_COL (buffer),
356 "\"/*\" within comment");
359 else if (c == '\n')
361 unsigned int cols;
362 buffer->cur = cur - 1;
363 _cpp_process_line_notes (pfile, true);
364 if (buffer->next_line >= buffer->rlimit)
365 return true;
366 _cpp_clean_line (pfile);
368 cols = buffer->next_line - buffer->line_base;
369 CPP_INCREMENT_LINE (pfile, cols);
371 cur = buffer->cur;
375 buffer->cur = cur;
376 _cpp_process_line_notes (pfile, true);
377 return false;
380 /* Skip a C++ line comment, leaving buffer->cur pointing to the
381 terminating newline. Handles escaped newlines. Returns nonzero
382 if a multiline comment. */
383 static int
384 skip_line_comment (cpp_reader *pfile)
386 cpp_buffer *buffer = pfile->buffer;
387 unsigned int orig_line = pfile->line_table->highest_line;
389 while (*buffer->cur != '\n')
390 buffer->cur++;
392 _cpp_process_line_notes (pfile, true);
393 return orig_line != pfile->line_table->highest_line;
396 /* Skips whitespace, saving the next non-whitespace character. */
397 static void
398 skip_whitespace (cpp_reader *pfile, cppchar_t c)
400 cpp_buffer *buffer = pfile->buffer;
401 bool saw_NUL = false;
405 /* Horizontal space always OK. */
406 if (c == ' ' || c == '\t')
408 /* Just \f \v or \0 left. */
409 else if (c == '\0')
410 saw_NUL = true;
411 else if (pfile->state.in_directive && CPP_PEDANTIC (pfile))
412 cpp_error_with_line (pfile, CPP_DL_PEDWARN, pfile->line_table->highest_line,
413 CPP_BUF_COL (buffer),
414 "%s in preprocessing directive",
415 c == '\f' ? "form feed" : "vertical tab");
417 c = *buffer->cur++;
419 /* We only want non-vertical space, i.e. ' ' \t \f \v \0. */
420 while (is_nvspace (c));
422 if (saw_NUL)
423 cpp_error (pfile, CPP_DL_WARNING, "null character(s) ignored");
425 buffer->cur--;
428 /* See if the characters of a number token are valid in a name (no
429 '.', '+' or '-'). */
430 static int
431 name_p (cpp_reader *pfile, const cpp_string *string)
433 unsigned int i;
435 for (i = 0; i < string->len; i++)
436 if (!is_idchar (string->text[i]))
437 return 0;
439 return 1;
442 /* After parsing an identifier or other sequence, produce a warning about
443 sequences not in NFC/NFKC. */
444 static void
445 warn_about_normalization (cpp_reader *pfile,
446 const cpp_token *token,
447 const struct normalize_state *s)
449 if (CPP_OPTION (pfile, warn_normalize) < NORMALIZE_STATE_RESULT (s)
450 && !pfile->state.skipping)
452 /* Make sure that the token is printed using UCNs, even
453 if we'd otherwise happily print UTF-8. */
454 unsigned char *buf = XNEWVEC (unsigned char, cpp_token_len (token));
455 size_t sz;
457 sz = cpp_spell_token (pfile, token, buf, false) - buf;
458 if (NORMALIZE_STATE_RESULT (s) == normalized_C)
459 cpp_error_with_line (pfile, CPP_DL_WARNING, token->src_loc, 0,
460 "`%.*s' is not in NFKC", (int) sz, buf);
461 else
462 cpp_error_with_line (pfile, CPP_DL_WARNING, token->src_loc, 0,
463 "`%.*s' is not in NFC", (int) sz, buf);
467 /* Returns TRUE if the sequence starting at buffer->cur is invalid in
468 an identifier. FIRST is TRUE if this starts an identifier. */
469 static bool
470 forms_identifier_p (cpp_reader *pfile, int first,
471 struct normalize_state *state)
473 cpp_buffer *buffer = pfile->buffer;
475 if (*buffer->cur == '$')
477 if (!CPP_OPTION (pfile, dollars_in_ident))
478 return false;
480 buffer->cur++;
481 if (CPP_OPTION (pfile, warn_dollars) && !pfile->state.skipping)
483 CPP_OPTION (pfile, warn_dollars) = 0;
484 cpp_error (pfile, CPP_DL_PEDWARN, "'$' in identifier or number");
487 return true;
490 /* Is this a syntactically valid UCN? */
491 if (CPP_OPTION (pfile, extended_identifiers)
492 && *buffer->cur == '\\'
493 && (buffer->cur[1] == 'u' || buffer->cur[1] == 'U'))
495 buffer->cur += 2;
496 if (_cpp_valid_ucn (pfile, &buffer->cur, buffer->rlimit, 1 + !first,
497 state))
498 return true;
499 buffer->cur -= 2;
502 return false;
505 /* Lex an identifier starting at BUFFER->CUR - 1. */
506 static cpp_hashnode *
507 lex_identifier (cpp_reader *pfile, const uchar *base, bool starts_ucn,
508 struct normalize_state *nst)
510 cpp_hashnode *result;
511 const uchar *cur;
512 unsigned int len;
513 unsigned int hash = HT_HASHSTEP (0, *base);
515 cur = pfile->buffer->cur;
516 if (! starts_ucn)
517 while (ISIDNUM (*cur))
519 hash = HT_HASHSTEP (hash, *cur);
520 cur++;
522 pfile->buffer->cur = cur;
523 if (starts_ucn || forms_identifier_p (pfile, false, nst))
525 /* Slower version for identifiers containing UCNs (or $). */
526 do {
527 while (ISIDNUM (*pfile->buffer->cur))
529 pfile->buffer->cur++;
530 NORMALIZE_STATE_UPDATE_IDNUM (nst);
532 } while (forms_identifier_p (pfile, false, nst));
533 result = _cpp_interpret_identifier (pfile, base,
534 pfile->buffer->cur - base);
536 else
538 len = cur - base;
539 hash = HT_HASHFINISH (hash, len);
541 result = CPP_HASHNODE (ht_lookup_with_hash (pfile->hash_table,
542 base, len, hash, HT_ALLOC));
545 /* Rarely, identifiers require diagnostics when lexed. */
546 if (__builtin_expect ((result->flags & NODE_DIAGNOSTIC)
547 && !pfile->state.skipping, 0))
549 /* It is allowed to poison the same identifier twice. */
550 if ((result->flags & NODE_POISONED) && !pfile->state.poisoned_ok)
551 cpp_error (pfile, CPP_DL_ERROR, "attempt to use poisoned \"%s\"",
552 NODE_NAME (result));
554 /* Constraint 6.10.3.5: __VA_ARGS__ should only appear in the
555 replacement list of a variadic macro. */
556 if (result == pfile->spec_nodes.n__VA_ARGS__
557 && !pfile->state.va_args_ok)
558 cpp_error (pfile, CPP_DL_PEDWARN,
559 "__VA_ARGS__ can only appear in the expansion"
560 " of a C99 variadic macro");
563 return result;
566 /* Lex a number to NUMBER starting at BUFFER->CUR - 1. */
567 static void
568 lex_number (cpp_reader *pfile, cpp_string *number,
569 struct normalize_state *nst)
571 const uchar *cur;
572 const uchar *base;
573 uchar *dest;
575 base = pfile->buffer->cur - 1;
578 cur = pfile->buffer->cur;
580 /* N.B. ISIDNUM does not include $. */
581 while (ISIDNUM (*cur) || *cur == '.' || VALID_SIGN (*cur, cur[-1]))
583 cur++;
584 NORMALIZE_STATE_UPDATE_IDNUM (nst);
587 pfile->buffer->cur = cur;
589 while (forms_identifier_p (pfile, false, nst));
591 number->len = cur - base;
592 dest = _cpp_unaligned_alloc (pfile, number->len + 1);
593 memcpy (dest, base, number->len);
594 dest[number->len] = '\0';
595 number->text = dest;
598 /* Create a token of type TYPE with a literal spelling. */
599 static void
600 create_literal (cpp_reader *pfile, cpp_token *token, const uchar *base,
601 unsigned int len, enum cpp_ttype type)
603 uchar *dest = _cpp_unaligned_alloc (pfile, len + 1);
605 memcpy (dest, base, len);
606 dest[len] = '\0';
607 token->type = type;
608 token->val.str.len = len;
609 token->val.str.text = dest;
612 /* Lexes a string, character constant, or angle-bracketed header file
613 name. The stored string contains the spelling, including opening
614 quote and leading any leading 'L', 'u' or 'U'. It returns the type
615 of the literal, or CPP_OTHER if it was not properly terminated.
617 The spelling is NUL-terminated, but it is not guaranteed that this
618 is the first NUL since embedded NULs are preserved. */
619 static void
620 lex_string (cpp_reader *pfile, cpp_token *token, const uchar *base)
622 bool saw_NUL = false;
623 const uchar *cur;
624 cppchar_t terminator;
625 enum cpp_ttype type;
627 cur = base;
628 terminator = *cur++;
629 if (terminator == 'L' || terminator == 'u' || terminator == 'U')
630 terminator = *cur++;
631 if (terminator == '\"')
632 type = (*base == 'L' ? CPP_WSTRING :
633 *base == 'U' ? CPP_STRING32 :
634 *base == 'u' ? CPP_STRING16 : CPP_STRING);
635 else if (terminator == '\'')
636 type = (*base == 'L' ? CPP_WCHAR :
637 *base == 'U' ? CPP_CHAR32 :
638 *base == 'u' ? CPP_CHAR16 : CPP_CHAR);
639 else
640 terminator = '>', type = CPP_HEADER_NAME;
642 for (;;)
644 cppchar_t c = *cur++;
646 /* In #include-style directives, terminators are not escapable. */
647 if (c == '\\' && !pfile->state.angled_headers && *cur != '\n')
648 cur++;
649 else if (c == terminator)
650 break;
651 else if (c == '\n')
653 cur--;
654 type = CPP_OTHER;
655 break;
657 else if (c == '\0')
658 saw_NUL = true;
661 if (saw_NUL && !pfile->state.skipping)
662 cpp_error (pfile, CPP_DL_WARNING,
663 "null character(s) preserved in literal");
665 if (type == CPP_OTHER && CPP_OPTION (pfile, lang) != CLK_ASM)
666 cpp_error (pfile, CPP_DL_PEDWARN, "missing terminating %c character",
667 (int) terminator);
669 pfile->buffer->cur = cur;
670 create_literal (pfile, token, base, cur - base, type);
673 /* The stored comment includes the comment start and any terminator. */
674 static void
675 save_comment (cpp_reader *pfile, cpp_token *token, const unsigned char *from,
676 cppchar_t type)
678 unsigned char *buffer;
679 unsigned int len, clen;
681 len = pfile->buffer->cur - from + 1; /* + 1 for the initial '/'. */
683 /* C++ comments probably (not definitely) have moved past a new
684 line, which we don't want to save in the comment. */
685 if (is_vspace (pfile->buffer->cur[-1]))
686 len--;
688 /* If we are currently in a directive, then we need to store all
689 C++ comments as C comments internally, and so we need to
690 allocate a little extra space in that case.
692 Note that the only time we encounter a directive here is
693 when we are saving comments in a "#define". */
694 clen = (pfile->state.in_directive && type == '/') ? len + 2 : len;
696 buffer = _cpp_unaligned_alloc (pfile, clen);
698 token->type = CPP_COMMENT;
699 token->val.str.len = clen;
700 token->val.str.text = buffer;
702 buffer[0] = '/';
703 memcpy (buffer + 1, from, len - 1);
705 /* Finish conversion to a C comment, if necessary. */
706 if (pfile->state.in_directive && type == '/')
708 buffer[1] = '*';
709 buffer[clen - 2] = '*';
710 buffer[clen - 1] = '/';
714 /* Allocate COUNT tokens for RUN. */
715 void
716 _cpp_init_tokenrun (tokenrun *run, unsigned int count)
718 run->base = XNEWVEC (cpp_token, count);
719 run->limit = run->base + count;
720 run->next = NULL;
723 /* Returns the next tokenrun, or creates one if there is none. */
724 static tokenrun *
725 next_tokenrun (tokenrun *run)
727 if (run->next == NULL)
729 run->next = XNEW (tokenrun);
730 run->next->prev = run;
731 _cpp_init_tokenrun (run->next, 250);
734 return run->next;
737 /* Allocate a single token that is invalidated at the same time as the
738 rest of the tokens on the line. Has its line and col set to the
739 same as the last lexed token, so that diagnostics appear in the
740 right place. */
741 cpp_token *
742 _cpp_temp_token (cpp_reader *pfile)
744 cpp_token *old, *result;
746 old = pfile->cur_token - 1;
747 if (pfile->cur_token == pfile->cur_run->limit)
749 pfile->cur_run = next_tokenrun (pfile->cur_run);
750 pfile->cur_token = pfile->cur_run->base;
753 result = pfile->cur_token++;
754 result->src_loc = old->src_loc;
755 return result;
758 /* Lex a token into RESULT (external interface). Takes care of issues
759 like directive handling, token lookahead, multiple include
760 optimization and skipping. */
761 const cpp_token *
762 _cpp_lex_token (cpp_reader *pfile)
764 cpp_token *result;
766 for (;;)
768 if (pfile->cur_token == pfile->cur_run->limit)
770 pfile->cur_run = next_tokenrun (pfile->cur_run);
771 pfile->cur_token = pfile->cur_run->base;
773 /* We assume that the current token is somewhere in the current
774 run. */
775 if (pfile->cur_token < pfile->cur_run->base
776 || pfile->cur_token >= pfile->cur_run->limit)
777 abort ();
779 if (pfile->lookaheads)
781 pfile->lookaheads--;
782 result = pfile->cur_token++;
784 else
785 result = _cpp_lex_direct (pfile);
787 if (result->flags & BOL)
789 /* Is this a directive. If _cpp_handle_directive returns
790 false, it is an assembler #. */
791 if (result->type == CPP_HASH
792 /* 6.10.3 p 11: Directives in a list of macro arguments
793 gives undefined behavior. This implementation
794 handles the directive as normal. */
795 && pfile->state.parsing_args != 1)
797 if (_cpp_handle_directive (pfile, result->flags & PREV_WHITE))
799 if (pfile->directive_result.type == CPP_PADDING)
800 continue;
801 result = &pfile->directive_result;
804 else if (pfile->state.in_deferred_pragma)
805 result = &pfile->directive_result;
807 if (pfile->cb.line_change && !pfile->state.skipping)
808 pfile->cb.line_change (pfile, result, pfile->state.parsing_args);
811 /* We don't skip tokens in directives. */
812 if (pfile->state.in_directive || pfile->state.in_deferred_pragma)
813 break;
815 /* Outside a directive, invalidate controlling macros. At file
816 EOF, _cpp_lex_direct takes care of popping the buffer, so we never
817 get here and MI optimization works. */
818 pfile->mi_valid = false;
820 if (!pfile->state.skipping || result->type == CPP_EOF)
821 break;
824 return result;
827 /* Returns true if a fresh line has been loaded. */
828 bool
829 _cpp_get_fresh_line (cpp_reader *pfile)
831 int return_at_eof;
833 /* We can't get a new line until we leave the current directive. */
834 if (pfile->state.in_directive)
835 return false;
837 for (;;)
839 cpp_buffer *buffer = pfile->buffer;
841 if (!buffer->need_line)
842 return true;
844 if (buffer->next_line < buffer->rlimit)
846 _cpp_clean_line (pfile);
847 return true;
850 /* First, get out of parsing arguments state. */
851 if (pfile->state.parsing_args)
852 return false;
854 /* End of buffer. Non-empty files should end in a newline. */
855 if (buffer->buf != buffer->rlimit
856 && buffer->next_line > buffer->rlimit
857 && !buffer->from_stage3)
859 /* Clip to buffer size. */
860 buffer->next_line = buffer->rlimit;
863 return_at_eof = buffer->return_at_eof;
864 _cpp_pop_buffer (pfile);
865 if (pfile->buffer == NULL || return_at_eof)
866 return false;
870 #define IF_NEXT_IS(CHAR, THEN_TYPE, ELSE_TYPE) \
871 do \
873 result->type = ELSE_TYPE; \
874 if (*buffer->cur == CHAR) \
875 buffer->cur++, result->type = THEN_TYPE; \
877 while (0)
879 /* Lex a token into pfile->cur_token, which is also incremented, to
880 get diagnostics pointing to the correct location.
882 Does not handle issues such as token lookahead, multiple-include
883 optimization, directives, skipping etc. This function is only
884 suitable for use by _cpp_lex_token, and in special cases like
885 lex_expansion_token which doesn't care for any of these issues.
887 When meeting a newline, returns CPP_EOF if parsing a directive,
888 otherwise returns to the start of the token buffer if permissible.
889 Returns the location of the lexed token. */
890 cpp_token *
891 _cpp_lex_direct (cpp_reader *pfile)
893 cppchar_t c;
894 cpp_buffer *buffer;
895 const unsigned char *comment_start;
896 cpp_token *result = pfile->cur_token++;
898 fresh_line:
899 result->flags = 0;
900 buffer = pfile->buffer;
901 if (buffer->need_line)
903 if (pfile->state.in_deferred_pragma)
905 result->type = CPP_PRAGMA_EOL;
906 pfile->state.in_deferred_pragma = false;
907 if (!pfile->state.pragma_allow_expansion)
908 pfile->state.prevent_expansion--;
909 return result;
911 if (!_cpp_get_fresh_line (pfile))
913 result->type = CPP_EOF;
914 if (!pfile->state.in_directive)
916 /* Tell the compiler the line number of the EOF token. */
917 result->src_loc = pfile->line_table->highest_line;
918 result->flags = BOL;
920 return result;
922 if (!pfile->keep_tokens)
924 pfile->cur_run = &pfile->base_run;
925 result = pfile->base_run.base;
926 pfile->cur_token = result + 1;
928 result->flags = BOL;
929 if (pfile->state.parsing_args == 2)
930 result->flags |= PREV_WHITE;
932 buffer = pfile->buffer;
933 update_tokens_line:
934 result->src_loc = pfile->line_table->highest_line;
936 skipped_white:
937 if (buffer->cur >= buffer->notes[buffer->cur_note].pos
938 && !pfile->overlaid_buffer)
940 _cpp_process_line_notes (pfile, false);
941 result->src_loc = pfile->line_table->highest_line;
943 c = *buffer->cur++;
945 LINEMAP_POSITION_FOR_COLUMN (result->src_loc, pfile->line_table,
946 CPP_BUF_COLUMN (buffer, buffer->cur));
948 switch (c)
950 case ' ': case '\t': case '\f': case '\v': case '\0':
951 result->flags |= PREV_WHITE;
952 skip_whitespace (pfile, c);
953 goto skipped_white;
955 case '\n':
956 if (buffer->cur < buffer->rlimit)
957 CPP_INCREMENT_LINE (pfile, 0);
958 buffer->need_line = true;
959 goto fresh_line;
961 case '0': case '1': case '2': case '3': case '4':
962 case '5': case '6': case '7': case '8': case '9':
964 struct normalize_state nst = INITIAL_NORMALIZE_STATE;
965 result->type = CPP_NUMBER;
966 lex_number (pfile, &result->val.str, &nst);
967 warn_about_normalization (pfile, result, &nst);
968 break;
971 case 'L':
972 case 'u':
973 case 'U':
974 /* 'L', 'u' or 'U' may introduce wide characters or strings. */
975 if (c == 'L' || CPP_OPTION (pfile, uliterals))
977 if (*buffer->cur == '\'' || *buffer->cur == '"')
979 lex_string (pfile, result, buffer->cur - 1);
980 break;
983 /* Fall through. */
985 case '_':
986 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
987 case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
988 case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
989 case 's': case 't': case 'v': case 'w': case 'x':
990 case 'y': case 'z':
991 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
992 case 'G': case 'H': case 'I': case 'J': case 'K':
993 case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
994 case 'S': case 'T': case 'V': case 'W': case 'X':
995 case 'Y': case 'Z':
996 result->type = CPP_NAME;
998 struct normalize_state nst = INITIAL_NORMALIZE_STATE;
999 result->val.node = lex_identifier (pfile, buffer->cur - 1, false,
1000 &nst);
1001 warn_about_normalization (pfile, result, &nst);
1004 /* Convert named operators to their proper types. */
1005 if (result->val.node->flags & NODE_OPERATOR)
1007 result->flags |= NAMED_OP;
1008 result->type = (enum cpp_ttype) result->val.node->directive_index;
1010 break;
1012 case '\'':
1013 case '"':
1014 lex_string (pfile, result, buffer->cur - 1);
1015 break;
1017 case '/':
1018 /* A potential block or line comment. */
1019 comment_start = buffer->cur;
1020 c = *buffer->cur;
1022 if (c == '*')
1024 if (_cpp_skip_block_comment (pfile))
1025 cpp_error (pfile, CPP_DL_ERROR, "unterminated comment");
1027 else if (c == '/' && (CPP_OPTION (pfile, cplusplus_comments)
1028 || cpp_in_system_header (pfile)))
1030 /* Warn about comments only if pedantically GNUC89, and not
1031 in system headers. */
1032 if (CPP_OPTION (pfile, lang) == CLK_GNUC89 && CPP_PEDANTIC (pfile)
1033 && ! buffer->warned_cplusplus_comments)
1035 cpp_error (pfile, CPP_DL_PEDWARN,
1036 "C++ style comments are not allowed in ISO C90");
1037 cpp_error (pfile, CPP_DL_PEDWARN,
1038 "(this will be reported only once per input file)");
1039 buffer->warned_cplusplus_comments = 1;
1042 if (skip_line_comment (pfile) && CPP_OPTION (pfile, warn_comments))
1043 cpp_error (pfile, CPP_DL_WARNING, "multi-line comment");
1045 else if (c == '=')
1047 buffer->cur++;
1048 result->type = CPP_DIV_EQ;
1049 break;
1051 else
1053 result->type = CPP_DIV;
1054 break;
1057 if (!pfile->state.save_comments)
1059 result->flags |= PREV_WHITE;
1060 goto update_tokens_line;
1063 /* Save the comment as a token in its own right. */
1064 save_comment (pfile, result, comment_start, c);
1065 break;
1067 case '<':
1068 if (pfile->state.angled_headers)
1070 lex_string (pfile, result, buffer->cur - 1);
1071 break;
1074 result->type = CPP_LESS;
1075 if (*buffer->cur == '=')
1076 buffer->cur++, result->type = CPP_LESS_EQ;
1077 else if (*buffer->cur == '<')
1079 buffer->cur++;
1080 IF_NEXT_IS ('=', CPP_LSHIFT_EQ, CPP_LSHIFT);
1082 else if (CPP_OPTION (pfile, digraphs))
1084 if (*buffer->cur == ':')
1086 buffer->cur++;
1087 result->flags |= DIGRAPH;
1088 result->type = CPP_OPEN_SQUARE;
1090 else if (*buffer->cur == '%')
1092 buffer->cur++;
1093 result->flags |= DIGRAPH;
1094 result->type = CPP_OPEN_BRACE;
1097 break;
1099 case '>':
1100 result->type = CPP_GREATER;
1101 if (*buffer->cur == '=')
1102 buffer->cur++, result->type = CPP_GREATER_EQ;
1103 else if (*buffer->cur == '>')
1105 buffer->cur++;
1106 IF_NEXT_IS ('=', CPP_RSHIFT_EQ, CPP_RSHIFT);
1108 break;
1110 case '%':
1111 result->type = CPP_MOD;
1112 if (*buffer->cur == '=')
1113 buffer->cur++, result->type = CPP_MOD_EQ;
1114 else if (CPP_OPTION (pfile, digraphs))
1116 if (*buffer->cur == ':')
1118 buffer->cur++;
1119 result->flags |= DIGRAPH;
1120 result->type = CPP_HASH;
1121 if (*buffer->cur == '%' && buffer->cur[1] == ':')
1122 buffer->cur += 2, result->type = CPP_PASTE;
1124 else if (*buffer->cur == '>')
1126 buffer->cur++;
1127 result->flags |= DIGRAPH;
1128 result->type = CPP_CLOSE_BRACE;
1131 break;
1133 case '.':
1134 result->type = CPP_DOT;
1135 if (ISDIGIT (*buffer->cur))
1137 struct normalize_state nst = INITIAL_NORMALIZE_STATE;
1138 result->type = CPP_NUMBER;
1139 lex_number (pfile, &result->val.str, &nst);
1140 warn_about_normalization (pfile, result, &nst);
1142 else if (*buffer->cur == '.' && buffer->cur[1] == '.')
1143 buffer->cur += 2, result->type = CPP_ELLIPSIS;
1144 else if (*buffer->cur == '*' && CPP_OPTION (pfile, cplusplus))
1145 buffer->cur++, result->type = CPP_DOT_STAR;
1146 break;
1148 case '+':
1149 result->type = CPP_PLUS;
1150 if (*buffer->cur == '+')
1151 buffer->cur++, result->type = CPP_PLUS_PLUS;
1152 else if (*buffer->cur == '=')
1153 buffer->cur++, result->type = CPP_PLUS_EQ;
1154 break;
1156 case '-':
1157 result->type = CPP_MINUS;
1158 if (*buffer->cur == '>')
1160 buffer->cur++;
1161 result->type = CPP_DEREF;
1162 if (*buffer->cur == '*' && CPP_OPTION (pfile, cplusplus))
1163 buffer->cur++, result->type = CPP_DEREF_STAR;
1165 else if (*buffer->cur == '-')
1166 buffer->cur++, result->type = CPP_MINUS_MINUS;
1167 else if (*buffer->cur == '=')
1168 buffer->cur++, result->type = CPP_MINUS_EQ;
1169 break;
1171 case '&':
1172 result->type = CPP_AND;
1173 if (*buffer->cur == '&')
1174 buffer->cur++, result->type = CPP_AND_AND;
1175 else if (*buffer->cur == '=')
1176 buffer->cur++, result->type = CPP_AND_EQ;
1177 break;
1179 case '|':
1180 result->type = CPP_OR;
1181 if (*buffer->cur == '|')
1182 buffer->cur++, result->type = CPP_OR_OR;
1183 else if (*buffer->cur == '=')
1184 buffer->cur++, result->type = CPP_OR_EQ;
1185 break;
1187 case ':':
1188 result->type = CPP_COLON;
1189 if (*buffer->cur == ':' && CPP_OPTION (pfile, cplusplus))
1190 buffer->cur++, result->type = CPP_SCOPE;
1191 else if (*buffer->cur == '>' && CPP_OPTION (pfile, digraphs))
1193 buffer->cur++;
1194 result->flags |= DIGRAPH;
1195 result->type = CPP_CLOSE_SQUARE;
1197 break;
1199 case '*': IF_NEXT_IS ('=', CPP_MULT_EQ, CPP_MULT); break;
1200 case '=': IF_NEXT_IS ('=', CPP_EQ_EQ, CPP_EQ); break;
1201 case '!': IF_NEXT_IS ('=', CPP_NOT_EQ, CPP_NOT); break;
1202 case '^': IF_NEXT_IS ('=', CPP_XOR_EQ, CPP_XOR); break;
1203 case '#': IF_NEXT_IS ('#', CPP_PASTE, CPP_HASH); break;
1205 case '?': result->type = CPP_QUERY; break;
1206 case '~': result->type = CPP_COMPL; break;
1207 case ',': result->type = CPP_COMMA; break;
1208 case '(': result->type = CPP_OPEN_PAREN; break;
1209 case ')': result->type = CPP_CLOSE_PAREN; break;
1210 case '[': result->type = CPP_OPEN_SQUARE; break;
1211 case ']': result->type = CPP_CLOSE_SQUARE; break;
1212 case '{': result->type = CPP_OPEN_BRACE; break;
1213 case '}': result->type = CPP_CLOSE_BRACE; break;
1214 case ';': result->type = CPP_SEMICOLON; break;
1216 /* @ is a punctuator in Objective-C. */
1217 case '@': result->type = CPP_ATSIGN; break;
1219 case '$':
1220 case '\\':
1222 const uchar *base = --buffer->cur;
1223 struct normalize_state nst = INITIAL_NORMALIZE_STATE;
1225 if (forms_identifier_p (pfile, true, &nst))
1227 result->type = CPP_NAME;
1228 result->val.node = lex_identifier (pfile, base, true, &nst);
1229 warn_about_normalization (pfile, result, &nst);
1230 break;
1232 buffer->cur++;
1235 default:
1236 create_literal (pfile, result, buffer->cur - 1, 1, CPP_OTHER);
1237 break;
1240 return result;
1243 /* An upper bound on the number of bytes needed to spell TOKEN.
1244 Does not include preceding whitespace. */
1245 unsigned int
1246 cpp_token_len (const cpp_token *token)
1248 unsigned int len;
1250 switch (TOKEN_SPELL (token))
1252 default: len = 4; break;
1253 case SPELL_LITERAL: len = token->val.str.len; break;
1254 case SPELL_IDENT: len = NODE_LEN (token->val.node) * 10; break;
1257 return len;
1260 /* Parse UTF-8 out of NAMEP and place a \U escape in BUFFER.
1261 Return the number of bytes read out of NAME. (There are always
1262 10 bytes written to BUFFER.) */
1264 static size_t
1265 utf8_to_ucn (unsigned char *buffer, const unsigned char *name)
1267 int j;
1268 int ucn_len = 0;
1269 int ucn_len_c;
1270 unsigned t;
1271 unsigned long utf32;
1273 /* Compute the length of the UTF-8 sequence. */
1274 for (t = *name; t & 0x80; t <<= 1)
1275 ucn_len++;
1277 utf32 = *name & (0x7F >> ucn_len);
1278 for (ucn_len_c = 1; ucn_len_c < ucn_len; ucn_len_c++)
1280 utf32 = (utf32 << 6) | (*++name & 0x3F);
1282 /* Ill-formed UTF-8. */
1283 if ((*name & ~0x3F) != 0x80)
1284 abort ();
1287 *buffer++ = '\\';
1288 *buffer++ = 'U';
1289 for (j = 7; j >= 0; j--)
1290 *buffer++ = "0123456789abcdef"[(utf32 >> (4 * j)) & 0xF];
1291 return ucn_len;
1295 /* Write the spelling of a token TOKEN to BUFFER. The buffer must
1296 already contain the enough space to hold the token's spelling.
1297 Returns a pointer to the character after the last character written.
1298 FORSTRING is true if this is to be the spelling after translation
1299 phase 1 (this is different for UCNs).
1300 FIXME: Would be nice if we didn't need the PFILE argument. */
1301 unsigned char *
1302 cpp_spell_token (cpp_reader *pfile, const cpp_token *token,
1303 unsigned char *buffer, bool forstring)
1305 switch (TOKEN_SPELL (token))
1307 case SPELL_OPERATOR:
1309 const unsigned char *spelling;
1310 unsigned char c;
1312 if (token->flags & DIGRAPH)
1313 spelling
1314 = digraph_spellings[(int) token->type - (int) CPP_FIRST_DIGRAPH];
1315 else if (token->flags & NAMED_OP)
1316 goto spell_ident;
1317 else
1318 spelling = TOKEN_NAME (token);
1320 while ((c = *spelling++) != '\0')
1321 *buffer++ = c;
1323 break;
1325 spell_ident:
1326 case SPELL_IDENT:
1327 if (forstring)
1329 memcpy (buffer, NODE_NAME (token->val.node),
1330 NODE_LEN (token->val.node));
1331 buffer += NODE_LEN (token->val.node);
1333 else
1335 size_t i;
1336 const unsigned char * name = NODE_NAME (token->val.node);
1338 for (i = 0; i < NODE_LEN (token->val.node); i++)
1339 if (name[i] & ~0x7F)
1341 i += utf8_to_ucn (buffer, name + i) - 1;
1342 buffer += 10;
1344 else
1345 *buffer++ = NODE_NAME (token->val.node)[i];
1347 break;
1349 case SPELL_LITERAL:
1350 memcpy (buffer, token->val.str.text, token->val.str.len);
1351 buffer += token->val.str.len;
1352 break;
1354 case SPELL_NONE:
1355 cpp_error (pfile, CPP_DL_ICE,
1356 "unspellable token %s", TOKEN_NAME (token));
1357 break;
1360 return buffer;
1363 /* Returns TOKEN spelt as a null-terminated string. The string is
1364 freed when the reader is destroyed. Useful for diagnostics. */
1365 unsigned char *
1366 cpp_token_as_text (cpp_reader *pfile, const cpp_token *token)
1368 unsigned int len = cpp_token_len (token) + 1;
1369 unsigned char *start = _cpp_unaligned_alloc (pfile, len), *end;
1371 end = cpp_spell_token (pfile, token, start, false);
1372 end[0] = '\0';
1374 return start;
1377 /* Used by C front ends, which really should move to using
1378 cpp_token_as_text. */
1379 const char *
1380 cpp_type2name (enum cpp_ttype type)
1382 return (const char *) token_spellings[type].name;
1385 /* Writes the spelling of token to FP, without any preceding space.
1386 Separated from cpp_spell_token for efficiency - to avoid stdio
1387 double-buffering. */
1388 void
1389 cpp_output_token (const cpp_token *token, FILE *fp)
1391 switch (TOKEN_SPELL (token))
1393 case SPELL_OPERATOR:
1395 const unsigned char *spelling;
1396 int c;
1398 if (token->flags & DIGRAPH)
1399 spelling
1400 = digraph_spellings[(int) token->type - (int) CPP_FIRST_DIGRAPH];
1401 else if (token->flags & NAMED_OP)
1402 goto spell_ident;
1403 else
1404 spelling = TOKEN_NAME (token);
1406 c = *spelling;
1408 putc (c, fp);
1409 while ((c = *++spelling) != '\0');
1411 break;
1413 spell_ident:
1414 case SPELL_IDENT:
1416 size_t i;
1417 const unsigned char * name = NODE_NAME (token->val.node);
1419 for (i = 0; i < NODE_LEN (token->val.node); i++)
1420 if (name[i] & ~0x7F)
1422 unsigned char buffer[10];
1423 i += utf8_to_ucn (buffer, name + i) - 1;
1424 fwrite (buffer, 1, 10, fp);
1426 else
1427 fputc (NODE_NAME (token->val.node)[i], fp);
1429 break;
1431 case SPELL_LITERAL:
1432 fwrite (token->val.str.text, 1, token->val.str.len, fp);
1433 break;
1435 case SPELL_NONE:
1436 /* An error, most probably. */
1437 break;
1441 /* Compare two tokens. */
1443 _cpp_equiv_tokens (const cpp_token *a, const cpp_token *b)
1445 if (a->type == b->type && a->flags == b->flags)
1446 switch (TOKEN_SPELL (a))
1448 default: /* Keep compiler happy. */
1449 case SPELL_OPERATOR:
1450 return 1;
1451 case SPELL_NONE:
1452 return (a->type != CPP_MACRO_ARG || a->val.arg_no == b->val.arg_no);
1453 case SPELL_IDENT:
1454 return a->val.node == b->val.node;
1455 case SPELL_LITERAL:
1456 return (a->val.str.len == b->val.str.len
1457 && !memcmp (a->val.str.text, b->val.str.text,
1458 a->val.str.len));
1461 return 0;
1464 /* Returns nonzero if a space should be inserted to avoid an
1465 accidental token paste for output. For simplicity, it is
1466 conservative, and occasionally advises a space where one is not
1467 needed, e.g. "." and ".2". */
1469 cpp_avoid_paste (cpp_reader *pfile, const cpp_token *token1,
1470 const cpp_token *token2)
1472 enum cpp_ttype a = token1->type, b = token2->type;
1473 cppchar_t c;
1475 if (token1->flags & NAMED_OP)
1476 a = CPP_NAME;
1477 if (token2->flags & NAMED_OP)
1478 b = CPP_NAME;
1480 c = EOF;
1481 if (token2->flags & DIGRAPH)
1482 c = digraph_spellings[(int) b - (int) CPP_FIRST_DIGRAPH][0];
1483 else if (token_spellings[b].category == SPELL_OPERATOR)
1484 c = token_spellings[b].name[0];
1486 /* Quickly get everything that can paste with an '='. */
1487 if ((int) a <= (int) CPP_LAST_EQ && c == '=')
1488 return 1;
1490 switch (a)
1492 case CPP_GREATER: return c == '>';
1493 case CPP_LESS: return c == '<' || c == '%' || c == ':';
1494 case CPP_PLUS: return c == '+';
1495 case CPP_MINUS: return c == '-' || c == '>';
1496 case CPP_DIV: return c == '/' || c == '*'; /* Comments. */
1497 case CPP_MOD: return c == ':' || c == '>';
1498 case CPP_AND: return c == '&';
1499 case CPP_OR: return c == '|';
1500 case CPP_COLON: return c == ':' || c == '>';
1501 case CPP_DEREF: return c == '*';
1502 case CPP_DOT: return c == '.' || c == '%' || b == CPP_NUMBER;
1503 case CPP_HASH: return c == '#' || c == '%'; /* Digraph form. */
1504 case CPP_NAME: return ((b == CPP_NUMBER
1505 && name_p (pfile, &token2->val.str))
1506 || b == CPP_NAME
1507 || b == CPP_CHAR || b == CPP_STRING); /* L */
1508 case CPP_NUMBER: return (b == CPP_NUMBER || b == CPP_NAME
1509 || c == '.' || c == '+' || c == '-');
1510 /* UCNs */
1511 case CPP_OTHER: return ((token1->val.str.text[0] == '\\'
1512 && b == CPP_NAME)
1513 || (CPP_OPTION (pfile, objc)
1514 && token1->val.str.text[0] == '@'
1515 && (b == CPP_NAME || b == CPP_STRING)));
1516 default: break;
1519 return 0;
1522 /* Output all the remaining tokens on the current line, and a newline
1523 character, to FP. Leading whitespace is removed. If there are
1524 macros, special token padding is not performed. */
1525 void
1526 cpp_output_line (cpp_reader *pfile, FILE *fp)
1528 const cpp_token *token;
1530 token = cpp_get_token (pfile);
1531 while (token->type != CPP_EOF)
1533 cpp_output_token (token, fp);
1534 token = cpp_get_token (pfile);
1535 if (token->flags & PREV_WHITE)
1536 putc (' ', fp);
1539 putc ('\n', fp);
1542 /* Memory buffers. Changing these three constants can have a dramatic
1543 effect on performance. The values here are reasonable defaults,
1544 but might be tuned. If you adjust them, be sure to test across a
1545 range of uses of cpplib, including heavy nested function-like macro
1546 expansion. Also check the change in peak memory usage (NJAMD is a
1547 good tool for this). */
1548 #define MIN_BUFF_SIZE 8000
1549 #define BUFF_SIZE_UPPER_BOUND(MIN_SIZE) (MIN_BUFF_SIZE + (MIN_SIZE) * 3 / 2)
1550 #define EXTENDED_BUFF_SIZE(BUFF, MIN_EXTRA) \
1551 (MIN_EXTRA + ((BUFF)->limit - (BUFF)->cur) * 2)
1553 #if MIN_BUFF_SIZE > BUFF_SIZE_UPPER_BOUND (0)
1554 #error BUFF_SIZE_UPPER_BOUND must be at least as large as MIN_BUFF_SIZE!
1555 #endif
1557 /* Create a new allocation buffer. Place the control block at the end
1558 of the buffer, so that buffer overflows will cause immediate chaos. */
1559 static _cpp_buff *
1560 new_buff (size_t len)
1562 _cpp_buff *result;
1563 unsigned char *base;
1565 if (len < MIN_BUFF_SIZE)
1566 len = MIN_BUFF_SIZE;
1567 len = CPP_ALIGN (len);
1569 base = XNEWVEC (unsigned char, len + sizeof (_cpp_buff));
1570 result = (_cpp_buff *) (base + len);
1571 result->base = base;
1572 result->cur = base;
1573 result->limit = base + len;
1574 result->next = NULL;
1575 return result;
1578 /* Place a chain of unwanted allocation buffers on the free list. */
1579 void
1580 _cpp_release_buff (cpp_reader *pfile, _cpp_buff *buff)
1582 _cpp_buff *end = buff;
1584 while (end->next)
1585 end = end->next;
1586 end->next = pfile->free_buffs;
1587 pfile->free_buffs = buff;
1590 /* Return a free buffer of size at least MIN_SIZE. */
1591 _cpp_buff *
1592 _cpp_get_buff (cpp_reader *pfile, size_t min_size)
1594 _cpp_buff *result, **p;
1596 for (p = &pfile->free_buffs;; p = &(*p)->next)
1598 size_t size;
1600 if (*p == NULL)
1601 return new_buff (min_size);
1602 result = *p;
1603 size = result->limit - result->base;
1604 /* Return a buffer that's big enough, but don't waste one that's
1605 way too big. */
1606 if (size >= min_size && size <= BUFF_SIZE_UPPER_BOUND (min_size))
1607 break;
1610 *p = result->next;
1611 result->next = NULL;
1612 result->cur = result->base;
1613 return result;
1616 /* Creates a new buffer with enough space to hold the uncommitted
1617 remaining bytes of BUFF, and at least MIN_EXTRA more bytes. Copies
1618 the excess bytes to the new buffer. Chains the new buffer after
1619 BUFF, and returns the new buffer. */
1620 _cpp_buff *
1621 _cpp_append_extend_buff (cpp_reader *pfile, _cpp_buff *buff, size_t min_extra)
1623 size_t size = EXTENDED_BUFF_SIZE (buff, min_extra);
1624 _cpp_buff *new_buff = _cpp_get_buff (pfile, size);
1626 buff->next = new_buff;
1627 memcpy (new_buff->base, buff->cur, BUFF_ROOM (buff));
1628 return new_buff;
1631 /* Creates a new buffer with enough space to hold the uncommitted
1632 remaining bytes of the buffer pointed to by BUFF, and at least
1633 MIN_EXTRA more bytes. Copies the excess bytes to the new buffer.
1634 Chains the new buffer before the buffer pointed to by BUFF, and
1635 updates the pointer to point to the new buffer. */
1636 void
1637 _cpp_extend_buff (cpp_reader *pfile, _cpp_buff **pbuff, size_t min_extra)
1639 _cpp_buff *new_buff, *old_buff = *pbuff;
1640 size_t size = EXTENDED_BUFF_SIZE (old_buff, min_extra);
1642 new_buff = _cpp_get_buff (pfile, size);
1643 memcpy (new_buff->base, old_buff->cur, BUFF_ROOM (old_buff));
1644 new_buff->next = old_buff;
1645 *pbuff = new_buff;
1648 /* Free a chain of buffers starting at BUFF. */
1649 void
1650 _cpp_free_buff (_cpp_buff *buff)
1652 _cpp_buff *next;
1654 for (; buff; buff = next)
1656 next = buff->next;
1657 free (buff->base);
1661 /* Allocate permanent, unaligned storage of length LEN. */
1662 unsigned char *
1663 _cpp_unaligned_alloc (cpp_reader *pfile, size_t len)
1665 _cpp_buff *buff = pfile->u_buff;
1666 unsigned char *result = buff->cur;
1668 if (len > (size_t) (buff->limit - result))
1670 buff = _cpp_get_buff (pfile, len);
1671 buff->next = pfile->u_buff;
1672 pfile->u_buff = buff;
1673 result = buff->cur;
1676 buff->cur = result + len;
1677 return result;
1680 /* Allocate permanent, unaligned storage of length LEN from a_buff.
1681 That buffer is used for growing allocations when saving macro
1682 replacement lists in a #define, and when parsing an answer to an
1683 assertion in #assert, #unassert or #if (and therefore possibly
1684 whilst expanding macros). It therefore must not be used by any
1685 code that they might call: specifically the lexer and the guts of
1686 the macro expander.
1688 All existing other uses clearly fit this restriction: storing
1689 registered pragmas during initialization. */
1690 unsigned char *
1691 _cpp_aligned_alloc (cpp_reader *pfile, size_t len)
1693 _cpp_buff *buff = pfile->a_buff;
1694 unsigned char *result = buff->cur;
1696 if (len > (size_t) (buff->limit - result))
1698 buff = _cpp_get_buff (pfile, len);
1699 buff->next = pfile->a_buff;
1700 pfile->a_buff = buff;
1701 result = buff->cur;
1704 buff->cur = result + len;
1705 return result;
1708 /* Say which field of TOK is in use. */
1710 enum cpp_token_fld_kind
1711 cpp_token_val_index (cpp_token *tok)
1713 switch (TOKEN_SPELL (tok))
1715 case SPELL_IDENT:
1716 return CPP_TOKEN_FLD_NODE;
1717 case SPELL_LITERAL:
1718 return CPP_TOKEN_FLD_STR;
1719 case SPELL_NONE:
1720 if (tok->type == CPP_MACRO_ARG)
1721 return CPP_TOKEN_FLD_ARG_NO;
1722 else if (tok->type == CPP_PADDING)
1723 return CPP_TOKEN_FLD_SOURCE;
1724 else if (tok->type == CPP_PRAGMA)
1725 return CPP_TOKEN_FLD_PRAGMA;
1726 /* else fall through */
1727 default:
1728 return CPP_TOKEN_FLD_NONE;