PR target/35485
[official-gcc.git] / libcpp / lex.c
blob57364f00bb98da1f3fa3085973450dc536d12699
1 /* CPP Library - lexical analysis.
2 Copyright (C) 2000, 2001, 2002, 2003, 2004, 2005, 2007, 2008 Free Software Foundation, Inc.
3 Contributed by Per Bothner, 1994-95.
4 Based on CCCP program by Paul Rubin, June 1986
5 Adapted to ANSI C, Richard Stallman, Jan 1987
6 Broken out to separate file, Zack Weinberg, Mar 2000
8 This program is free software; you can redistribute it and/or modify it
9 under the terms of the GNU General Public License as published by the
10 Free Software Foundation; either version 2, or (at your option) any
11 later version.
13 This program is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with this program; if not, write to the Free Software
20 Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
22 #include "config.h"
23 #include "system.h"
24 #include "cpplib.h"
25 #include "internal.h"
27 enum spell_type
29 SPELL_OPERATOR = 0,
30 SPELL_IDENT,
31 SPELL_LITERAL,
32 SPELL_NONE
35 struct token_spelling
37 enum spell_type category;
38 const unsigned char *name;
41 static const unsigned char *const digraph_spellings[] =
42 { UC"%:", UC"%:%:", UC"<:", UC":>", UC"<%", UC"%>" };
44 #define OP(e, s) { SPELL_OPERATOR, UC s },
45 #define TK(e, s) { SPELL_ ## s, UC #e },
46 static const struct token_spelling token_spellings[N_TTYPES] = { TTYPE_TABLE };
47 #undef OP
48 #undef TK
50 #define TOKEN_SPELL(token) (token_spellings[(token)->type].category)
51 #define TOKEN_NAME(token) (token_spellings[(token)->type].name)
53 static void add_line_note (cpp_buffer *, const uchar *, unsigned int);
54 static int skip_line_comment (cpp_reader *);
55 static void skip_whitespace (cpp_reader *, cppchar_t);
56 static void lex_string (cpp_reader *, cpp_token *, const uchar *);
57 static void save_comment (cpp_reader *, cpp_token *, const uchar *, cppchar_t);
58 static void store_comment (cpp_reader *, cpp_token *);
59 static void create_literal (cpp_reader *, cpp_token *, const uchar *,
60 unsigned int, enum cpp_ttype);
61 static bool warn_in_comment (cpp_reader *, _cpp_line_note *);
62 static int name_p (cpp_reader *, const cpp_string *);
63 static tokenrun *next_tokenrun (tokenrun *);
65 static _cpp_buff *new_buff (size_t);
68 /* Utility routine:
70 Compares, the token TOKEN to the NUL-terminated string STRING.
71 TOKEN must be a CPP_NAME. Returns 1 for equal, 0 for unequal. */
72 int
73 cpp_ideq (const cpp_token *token, const char *string)
75 if (token->type != CPP_NAME)
76 return 0;
78 return !ustrcmp (NODE_NAME (token->val.node), (const uchar *) string);
81 /* Record a note TYPE at byte POS into the current cleaned logical
82 line. */
83 static void
84 add_line_note (cpp_buffer *buffer, const uchar *pos, unsigned int type)
86 if (buffer->notes_used == buffer->notes_cap)
88 buffer->notes_cap = buffer->notes_cap * 2 + 200;
89 buffer->notes = XRESIZEVEC (_cpp_line_note, buffer->notes,
90 buffer->notes_cap);
93 buffer->notes[buffer->notes_used].pos = pos;
94 buffer->notes[buffer->notes_used].type = type;
95 buffer->notes_used++;
98 /* Returns with a logical line that contains no escaped newlines or
99 trigraphs. This is a time-critical inner loop. */
100 void
101 _cpp_clean_line (cpp_reader *pfile)
103 cpp_buffer *buffer;
104 const uchar *s;
105 uchar c, *d, *p;
107 buffer = pfile->buffer;
108 buffer->cur_note = buffer->notes_used = 0;
109 buffer->cur = buffer->line_base = buffer->next_line;
110 buffer->need_line = false;
111 s = buffer->next_line - 1;
113 if (!buffer->from_stage3)
115 const uchar *pbackslash = NULL;
117 /* Short circuit for the common case of an un-escaped line with
118 no trigraphs. The primary win here is by not writing any
119 data back to memory until we have to. */
120 for (;;)
122 c = *++s;
123 if (__builtin_expect (c == '\n', false)
124 || __builtin_expect (c == '\r', false))
126 d = (uchar *) s;
128 if (__builtin_expect (s == buffer->rlimit, false))
129 goto done;
131 /* DOS line ending? */
132 if (__builtin_expect (c == '\r', false)
133 && s[1] == '\n')
135 s++;
136 if (s == buffer->rlimit)
137 goto done;
140 if (__builtin_expect (pbackslash == NULL, true))
141 goto done;
143 /* Check for escaped newline. */
144 p = d;
145 while (is_nvspace (p[-1]))
146 p--;
147 if (p - 1 != pbackslash)
148 goto done;
150 /* Have an escaped newline; process it and proceed to
151 the slow path. */
152 add_line_note (buffer, p - 1, p != d ? ' ' : '\\');
153 d = p - 2;
154 buffer->next_line = p - 1;
155 break;
157 if (__builtin_expect (c == '\\', false))
158 pbackslash = s;
159 else if (__builtin_expect (c == '?', false)
160 && __builtin_expect (s[1] == '?', false)
161 && _cpp_trigraph_map[s[2]])
163 /* Have a trigraph. We may or may not have to convert
164 it. Add a line note regardless, for -Wtrigraphs. */
165 add_line_note (buffer, s, s[2]);
166 if (CPP_OPTION (pfile, trigraphs))
168 /* We do, and that means we have to switch to the
169 slow path. */
170 d = (uchar *) s;
171 *d = _cpp_trigraph_map[s[2]];
172 s += 2;
173 break;
179 for (;;)
181 c = *++s;
182 *++d = c;
184 if (c == '\n' || c == '\r')
186 /* Handle DOS line endings. */
187 if (c == '\r' && s != buffer->rlimit && s[1] == '\n')
188 s++;
189 if (s == buffer->rlimit)
190 break;
192 /* Escaped? */
193 p = d;
194 while (p != buffer->next_line && is_nvspace (p[-1]))
195 p--;
196 if (p == buffer->next_line || p[-1] != '\\')
197 break;
199 add_line_note (buffer, p - 1, p != d ? ' ': '\\');
200 d = p - 2;
201 buffer->next_line = p - 1;
203 else if (c == '?' && s[1] == '?' && _cpp_trigraph_map[s[2]])
205 /* Add a note regardless, for the benefit of -Wtrigraphs. */
206 add_line_note (buffer, d, s[2]);
207 if (CPP_OPTION (pfile, trigraphs))
209 *d = _cpp_trigraph_map[s[2]];
210 s += 2;
215 else
218 s++;
219 while (*s != '\n' && *s != '\r');
220 d = (uchar *) s;
222 /* Handle DOS line endings. */
223 if (*s == '\r' && s != buffer->rlimit && s[1] == '\n')
224 s++;
227 done:
228 *d = '\n';
229 /* A sentinel note that should never be processed. */
230 add_line_note (buffer, d + 1, '\n');
231 buffer->next_line = s + 1;
234 /* Return true if the trigraph indicated by NOTE should be warned
235 about in a comment. */
236 static bool
237 warn_in_comment (cpp_reader *pfile, _cpp_line_note *note)
239 const uchar *p;
241 /* Within comments we don't warn about trigraphs, unless the
242 trigraph forms an escaped newline, as that may change
243 behavior. */
244 if (note->type != '/')
245 return false;
247 /* If -trigraphs, then this was an escaped newline iff the next note
248 is coincident. */
249 if (CPP_OPTION (pfile, trigraphs))
250 return note[1].pos == note->pos;
252 /* Otherwise, see if this forms an escaped newline. */
253 p = note->pos + 3;
254 while (is_nvspace (*p))
255 p++;
257 /* There might have been escaped newlines between the trigraph and the
258 newline we found. Hence the position test. */
259 return (*p == '\n' && p < note[1].pos);
262 /* Process the notes created by add_line_note as far as the current
263 location. */
264 void
265 _cpp_process_line_notes (cpp_reader *pfile, int in_comment)
267 cpp_buffer *buffer = pfile->buffer;
269 for (;;)
271 _cpp_line_note *note = &buffer->notes[buffer->cur_note];
272 unsigned int col;
274 if (note->pos > buffer->cur)
275 break;
277 buffer->cur_note++;
278 col = CPP_BUF_COLUMN (buffer, note->pos + 1);
280 if (note->type == '\\' || note->type == ' ')
282 if (note->type == ' ' && !in_comment)
283 cpp_error_with_line (pfile, CPP_DL_WARNING, pfile->line_table->highest_line, col,
284 "backslash and newline separated by space");
286 if (buffer->next_line > buffer->rlimit)
288 cpp_error_with_line (pfile, CPP_DL_PEDWARN, pfile->line_table->highest_line, col,
289 "backslash-newline at end of file");
290 /* Prevent "no newline at end of file" warning. */
291 buffer->next_line = buffer->rlimit;
294 buffer->line_base = note->pos;
295 CPP_INCREMENT_LINE (pfile, 0);
297 else if (_cpp_trigraph_map[note->type])
299 if (CPP_OPTION (pfile, warn_trigraphs)
300 && (!in_comment || warn_in_comment (pfile, note)))
302 if (CPP_OPTION (pfile, trigraphs))
303 cpp_error_with_line (pfile, CPP_DL_WARNING, pfile->line_table->highest_line, col,
304 "trigraph ??%c converted to %c",
305 note->type,
306 (int) _cpp_trigraph_map[note->type]);
307 else
309 cpp_error_with_line
310 (pfile, CPP_DL_WARNING, pfile->line_table->highest_line, col,
311 "trigraph ??%c ignored, use -trigraphs to enable",
312 note->type);
316 else
317 abort ();
321 /* Skip a C-style block comment. We find the end of the comment by
322 seeing if an asterisk is before every '/' we encounter. Returns
323 nonzero if comment terminated by EOF, zero otherwise.
325 Buffer->cur points to the initial asterisk of the comment. */
326 bool
327 _cpp_skip_block_comment (cpp_reader *pfile)
329 cpp_buffer *buffer = pfile->buffer;
330 const uchar *cur = buffer->cur;
331 uchar c;
333 cur++;
334 if (*cur == '/')
335 cur++;
337 for (;;)
339 /* People like decorating comments with '*', so check for '/'
340 instead for efficiency. */
341 c = *cur++;
343 if (c == '/')
345 if (cur[-2] == '*')
346 break;
348 /* Warn about potential nested comments, but not if the '/'
349 comes immediately before the true comment delimiter.
350 Don't bother to get it right across escaped newlines. */
351 if (CPP_OPTION (pfile, warn_comments)
352 && cur[0] == '*' && cur[1] != '/')
354 buffer->cur = cur;
355 cpp_error_with_line (pfile, CPP_DL_WARNING,
356 pfile->line_table->highest_line, CPP_BUF_COL (buffer),
357 "\"/*\" within comment");
360 else if (c == '\n')
362 unsigned int cols;
363 buffer->cur = cur - 1;
364 _cpp_process_line_notes (pfile, true);
365 if (buffer->next_line >= buffer->rlimit)
366 return true;
367 _cpp_clean_line (pfile);
369 cols = buffer->next_line - buffer->line_base;
370 CPP_INCREMENT_LINE (pfile, cols);
372 cur = buffer->cur;
376 buffer->cur = cur;
377 _cpp_process_line_notes (pfile, true);
378 return false;
381 /* Skip a C++ line comment, leaving buffer->cur pointing to the
382 terminating newline. Handles escaped newlines. Returns nonzero
383 if a multiline comment. */
384 static int
385 skip_line_comment (cpp_reader *pfile)
387 cpp_buffer *buffer = pfile->buffer;
388 source_location orig_line = pfile->line_table->highest_line;
390 while (*buffer->cur != '\n')
391 buffer->cur++;
393 _cpp_process_line_notes (pfile, true);
394 return orig_line != pfile->line_table->highest_line;
397 /* Skips whitespace, saving the next non-whitespace character. */
398 static void
399 skip_whitespace (cpp_reader *pfile, cppchar_t c)
401 cpp_buffer *buffer = pfile->buffer;
402 bool saw_NUL = false;
406 /* Horizontal space always OK. */
407 if (c == ' ' || c == '\t')
409 /* Just \f \v or \0 left. */
410 else if (c == '\0')
411 saw_NUL = true;
412 else if (pfile->state.in_directive && CPP_PEDANTIC (pfile))
413 cpp_error_with_line (pfile, CPP_DL_PEDWARN, pfile->line_table->highest_line,
414 CPP_BUF_COL (buffer),
415 "%s in preprocessing directive",
416 c == '\f' ? "form feed" : "vertical tab");
418 c = *buffer->cur++;
420 /* We only want non-vertical space, i.e. ' ' \t \f \v \0. */
421 while (is_nvspace (c));
423 if (saw_NUL)
424 cpp_error (pfile, CPP_DL_WARNING, "null character(s) ignored");
426 buffer->cur--;
429 /* See if the characters of a number token are valid in a name (no
430 '.', '+' or '-'). */
431 static int
432 name_p (cpp_reader *pfile, const cpp_string *string)
434 unsigned int i;
436 for (i = 0; i < string->len; i++)
437 if (!is_idchar (string->text[i]))
438 return 0;
440 return 1;
443 /* After parsing an identifier or other sequence, produce a warning about
444 sequences not in NFC/NFKC. */
445 static void
446 warn_about_normalization (cpp_reader *pfile,
447 const cpp_token *token,
448 const struct normalize_state *s)
450 if (CPP_OPTION (pfile, warn_normalize) < NORMALIZE_STATE_RESULT (s)
451 && !pfile->state.skipping)
453 /* Make sure that the token is printed using UCNs, even
454 if we'd otherwise happily print UTF-8. */
455 unsigned char *buf = XNEWVEC (unsigned char, cpp_token_len (token));
456 size_t sz;
458 sz = cpp_spell_token (pfile, token, buf, false) - buf;
459 if (NORMALIZE_STATE_RESULT (s) == normalized_C)
460 cpp_error_with_line (pfile, CPP_DL_WARNING, token->src_loc, 0,
461 "`%.*s' is not in NFKC", (int) sz, buf);
462 else
463 cpp_error_with_line (pfile, CPP_DL_WARNING, token->src_loc, 0,
464 "`%.*s' is not in NFC", (int) sz, buf);
468 /* Returns TRUE if the sequence starting at buffer->cur is invalid in
469 an identifier. FIRST is TRUE if this starts an identifier. */
470 static bool
471 forms_identifier_p (cpp_reader *pfile, int first,
472 struct normalize_state *state)
474 cpp_buffer *buffer = pfile->buffer;
476 if (*buffer->cur == '$')
478 if (!CPP_OPTION (pfile, dollars_in_ident))
479 return false;
481 buffer->cur++;
482 if (CPP_OPTION (pfile, warn_dollars) && !pfile->state.skipping)
484 CPP_OPTION (pfile, warn_dollars) = 0;
485 cpp_error (pfile, CPP_DL_PEDWARN, "'$' in identifier or number");
488 return true;
491 /* Is this a syntactically valid UCN? */
492 if (CPP_OPTION (pfile, extended_identifiers)
493 && *buffer->cur == '\\'
494 && (buffer->cur[1] == 'u' || buffer->cur[1] == 'U'))
496 buffer->cur += 2;
497 if (_cpp_valid_ucn (pfile, &buffer->cur, buffer->rlimit, 1 + !first,
498 state))
499 return true;
500 buffer->cur -= 2;
503 return false;
506 /* Lex an identifier starting at BUFFER->CUR - 1. */
507 static cpp_hashnode *
508 lex_identifier (cpp_reader *pfile, const uchar *base, bool starts_ucn,
509 struct normalize_state *nst)
511 cpp_hashnode *result;
512 const uchar *cur;
513 unsigned int len;
514 unsigned int hash = HT_HASHSTEP (0, *base);
516 cur = pfile->buffer->cur;
517 if (! starts_ucn)
518 while (ISIDNUM (*cur))
520 hash = HT_HASHSTEP (hash, *cur);
521 cur++;
523 pfile->buffer->cur = cur;
524 if (starts_ucn || forms_identifier_p (pfile, false, nst))
526 /* Slower version for identifiers containing UCNs (or $). */
527 do {
528 while (ISIDNUM (*pfile->buffer->cur))
530 pfile->buffer->cur++;
531 NORMALIZE_STATE_UPDATE_IDNUM (nst);
533 } while (forms_identifier_p (pfile, false, nst));
534 result = _cpp_interpret_identifier (pfile, base,
535 pfile->buffer->cur - base);
537 else
539 len = cur - base;
540 hash = HT_HASHFINISH (hash, len);
542 result = CPP_HASHNODE (ht_lookup_with_hash (pfile->hash_table,
543 base, len, hash, HT_ALLOC));
546 /* Rarely, identifiers require diagnostics when lexed. */
547 if (__builtin_expect ((result->flags & NODE_DIAGNOSTIC)
548 && !pfile->state.skipping, 0))
550 /* It is allowed to poison the same identifier twice. */
551 if ((result->flags & NODE_POISONED) && !pfile->state.poisoned_ok)
552 cpp_error (pfile, CPP_DL_ERROR, "attempt to use poisoned \"%s\"",
553 NODE_NAME (result));
555 /* Constraint 6.10.3.5: __VA_ARGS__ should only appear in the
556 replacement list of a variadic macro. */
557 if (result == pfile->spec_nodes.n__VA_ARGS__
558 && !pfile->state.va_args_ok)
559 cpp_error (pfile, CPP_DL_PEDWARN,
560 "__VA_ARGS__ can only appear in the expansion"
561 " of a C99 variadic macro");
564 return result;
567 /* Lex a number to NUMBER starting at BUFFER->CUR - 1. */
568 static void
569 lex_number (cpp_reader *pfile, cpp_string *number,
570 struct normalize_state *nst)
572 const uchar *cur;
573 const uchar *base;
574 uchar *dest;
576 base = pfile->buffer->cur - 1;
579 cur = pfile->buffer->cur;
581 /* N.B. ISIDNUM does not include $. */
582 while (ISIDNUM (*cur) || *cur == '.' || VALID_SIGN (*cur, cur[-1]))
584 cur++;
585 NORMALIZE_STATE_UPDATE_IDNUM (nst);
588 pfile->buffer->cur = cur;
590 while (forms_identifier_p (pfile, false, nst));
592 number->len = cur - base;
593 dest = _cpp_unaligned_alloc (pfile, number->len + 1);
594 memcpy (dest, base, number->len);
595 dest[number->len] = '\0';
596 number->text = dest;
599 /* Create a token of type TYPE with a literal spelling. */
600 static void
601 create_literal (cpp_reader *pfile, cpp_token *token, const uchar *base,
602 unsigned int len, enum cpp_ttype type)
604 uchar *dest = _cpp_unaligned_alloc (pfile, len + 1);
606 memcpy (dest, base, len);
607 dest[len] = '\0';
608 token->type = type;
609 token->val.str.len = len;
610 token->val.str.text = dest;
613 /* Lexes a string, character constant, or angle-bracketed header file
614 name. The stored string contains the spelling, including opening
615 quote and leading any leading 'L', 'u' or 'U'. It returns the type
616 of the literal, or CPP_OTHER if it was not properly terminated.
618 The spelling is NUL-terminated, but it is not guaranteed that this
619 is the first NUL since embedded NULs are preserved. */
620 static void
621 lex_string (cpp_reader *pfile, cpp_token *token, const uchar *base)
623 bool saw_NUL = false;
624 const uchar *cur;
625 cppchar_t terminator;
626 enum cpp_ttype type;
628 cur = base;
629 terminator = *cur++;
630 if (terminator == 'L' || terminator == 'u' || terminator == 'U')
631 terminator = *cur++;
632 if (terminator == '\"')
633 type = (*base == 'L' ? CPP_WSTRING :
634 *base == 'U' ? CPP_STRING32 :
635 *base == 'u' ? CPP_STRING16 : CPP_STRING);
636 else if (terminator == '\'')
637 type = (*base == 'L' ? CPP_WCHAR :
638 *base == 'U' ? CPP_CHAR32 :
639 *base == 'u' ? CPP_CHAR16 : CPP_CHAR);
640 else
641 terminator = '>', type = CPP_HEADER_NAME;
643 for (;;)
645 cppchar_t c = *cur++;
647 /* In #include-style directives, terminators are not escapable. */
648 if (c == '\\' && !pfile->state.angled_headers && *cur != '\n')
649 cur++;
650 else if (c == terminator)
651 break;
652 else if (c == '\n')
654 cur--;
655 type = CPP_OTHER;
656 break;
658 else if (c == '\0')
659 saw_NUL = true;
662 if (saw_NUL && !pfile->state.skipping)
663 cpp_error (pfile, CPP_DL_WARNING,
664 "null character(s) preserved in literal");
666 if (type == CPP_OTHER && CPP_OPTION (pfile, lang) != CLK_ASM)
667 cpp_error (pfile, CPP_DL_PEDWARN, "missing terminating %c character",
668 (int) terminator);
670 pfile->buffer->cur = cur;
671 create_literal (pfile, token, base, cur - base, type);
674 /* Return the comment table. The client may not make any assumption
675 about the ordering of the table. */
676 cpp_comment_table *
677 cpp_get_comments (cpp_reader *pfile)
679 return &pfile->comments;
682 /* Append a comment to the end of the comment table. */
683 static void
684 store_comment (cpp_reader *pfile, cpp_token *token)
686 int len;
688 if (pfile->comments.allocated == 0)
690 pfile->comments.allocated = 256;
691 pfile->comments.entries = (cpp_comment *) xmalloc
692 (pfile->comments.allocated * sizeof (cpp_comment));
695 if (pfile->comments.count == pfile->comments.allocated)
697 pfile->comments.allocated *= 2;
698 pfile->comments.entries = (cpp_comment *) xrealloc
699 (pfile->comments.entries,
700 pfile->comments.allocated * sizeof (cpp_comment));
703 len = token->val.str.len;
705 /* Copy comment. Note, token may not be NULL terminated. */
706 pfile->comments.entries[pfile->comments.count].comment =
707 (char *) xmalloc (sizeof (char) * (len + 1));
708 memcpy (pfile->comments.entries[pfile->comments.count].comment,
709 token->val.str.text, len);
710 pfile->comments.entries[pfile->comments.count].comment[len] = '\0';
712 /* Set source location. */
713 pfile->comments.entries[pfile->comments.count].sloc = token->src_loc;
715 /* Increment the count of entries in the comment table. */
716 pfile->comments.count++;
719 /* The stored comment includes the comment start and any terminator. */
720 static void
721 save_comment (cpp_reader *pfile, cpp_token *token, const unsigned char *from,
722 cppchar_t type)
724 unsigned char *buffer;
725 unsigned int len, clen;
727 len = pfile->buffer->cur - from + 1; /* + 1 for the initial '/'. */
729 /* C++ comments probably (not definitely) have moved past a new
730 line, which we don't want to save in the comment. */
731 if (is_vspace (pfile->buffer->cur[-1]))
732 len--;
734 /* If we are currently in a directive, then we need to store all
735 C++ comments as C comments internally, and so we need to
736 allocate a little extra space in that case.
738 Note that the only time we encounter a directive here is
739 when we are saving comments in a "#define". */
740 clen = (pfile->state.in_directive && type == '/') ? len + 2 : len;
742 buffer = _cpp_unaligned_alloc (pfile, clen);
744 token->type = CPP_COMMENT;
745 token->val.str.len = clen;
746 token->val.str.text = buffer;
748 buffer[0] = '/';
749 memcpy (buffer + 1, from, len - 1);
751 /* Finish conversion to a C comment, if necessary. */
752 if (pfile->state.in_directive && type == '/')
754 buffer[1] = '*';
755 buffer[clen - 2] = '*';
756 buffer[clen - 1] = '/';
759 /* Finally store this comment for use by clients of libcpp. */
760 store_comment (pfile, token);
763 /* Allocate COUNT tokens for RUN. */
764 void
765 _cpp_init_tokenrun (tokenrun *run, unsigned int count)
767 run->base = XNEWVEC (cpp_token, count);
768 run->limit = run->base + count;
769 run->next = NULL;
772 /* Returns the next tokenrun, or creates one if there is none. */
773 static tokenrun *
774 next_tokenrun (tokenrun *run)
776 if (run->next == NULL)
778 run->next = XNEW (tokenrun);
779 run->next->prev = run;
780 _cpp_init_tokenrun (run->next, 250);
783 return run->next;
786 /* Look ahead in the input stream. */
787 const cpp_token *
788 cpp_peek_token (cpp_reader *pfile, int index)
790 cpp_context *context = pfile->context;
791 const cpp_token *peektok;
792 int count;
794 /* First, scan through any pending cpp_context objects. */
795 while (context->prev)
797 ptrdiff_t sz = (context->direct_p
798 ? LAST (context).token - FIRST (context).token
799 : LAST (context).ptoken - FIRST (context).ptoken);
801 if (index < (int) sz)
802 return (context->direct_p
803 ? FIRST (context).token + index
804 : *(FIRST (context).ptoken + index));
806 index -= (int) sz;
807 context = context->prev;
810 /* We will have to read some new tokens after all (and do so
811 without invalidating preceding tokens). */
812 count = index;
813 pfile->keep_tokens++;
817 peektok = _cpp_lex_token (pfile);
818 if (peektok->type == CPP_EOF)
819 return peektok;
821 while (index--);
823 _cpp_backup_tokens_direct (pfile, count + 1);
824 pfile->keep_tokens--;
826 return peektok;
829 /* Allocate a single token that is invalidated at the same time as the
830 rest of the tokens on the line. Has its line and col set to the
831 same as the last lexed token, so that diagnostics appear in the
832 right place. */
833 cpp_token *
834 _cpp_temp_token (cpp_reader *pfile)
836 cpp_token *old, *result;
837 ptrdiff_t sz = pfile->cur_run->limit - pfile->cur_token;
838 ptrdiff_t la = (ptrdiff_t) pfile->lookaheads;
840 old = pfile->cur_token - 1;
841 /* Any pre-existing lookaheads must not be clobbered. */
842 if (la)
844 if (sz <= la)
846 tokenrun *next = next_tokenrun (pfile->cur_run);
848 if (sz < la)
849 memmove (next->base + 1, next->base,
850 (la - sz) * sizeof (cpp_token));
852 next->base[0] = pfile->cur_run->limit[-1];
855 if (sz > 1)
856 memmove (pfile->cur_token + 1, pfile->cur_token,
857 MIN (la, sz - 1) * sizeof (cpp_token));
860 if (!sz && pfile->cur_token == pfile->cur_run->limit)
862 pfile->cur_run = next_tokenrun (pfile->cur_run);
863 pfile->cur_token = pfile->cur_run->base;
866 result = pfile->cur_token++;
867 result->src_loc = old->src_loc;
868 return result;
871 /* Lex a token into RESULT (external interface). Takes care of issues
872 like directive handling, token lookahead, multiple include
873 optimization and skipping. */
874 const cpp_token *
875 _cpp_lex_token (cpp_reader *pfile)
877 cpp_token *result;
879 for (;;)
881 if (pfile->cur_token == pfile->cur_run->limit)
883 pfile->cur_run = next_tokenrun (pfile->cur_run);
884 pfile->cur_token = pfile->cur_run->base;
886 /* We assume that the current token is somewhere in the current
887 run. */
888 if (pfile->cur_token < pfile->cur_run->base
889 || pfile->cur_token >= pfile->cur_run->limit)
890 abort ();
892 if (pfile->lookaheads)
894 pfile->lookaheads--;
895 result = pfile->cur_token++;
897 else
898 result = _cpp_lex_direct (pfile);
900 if (result->flags & BOL)
902 /* Is this a directive. If _cpp_handle_directive returns
903 false, it is an assembler #. */
904 if (result->type == CPP_HASH
905 /* 6.10.3 p 11: Directives in a list of macro arguments
906 gives undefined behavior. This implementation
907 handles the directive as normal. */
908 && pfile->state.parsing_args != 1)
910 if (_cpp_handle_directive (pfile, result->flags & PREV_WHITE))
912 if (pfile->directive_result.type == CPP_PADDING)
913 continue;
914 result = &pfile->directive_result;
917 else if (pfile->state.in_deferred_pragma)
918 result = &pfile->directive_result;
920 if (pfile->cb.line_change && !pfile->state.skipping)
921 pfile->cb.line_change (pfile, result, pfile->state.parsing_args);
924 /* We don't skip tokens in directives. */
925 if (pfile->state.in_directive || pfile->state.in_deferred_pragma)
926 break;
928 /* Outside a directive, invalidate controlling macros. At file
929 EOF, _cpp_lex_direct takes care of popping the buffer, so we never
930 get here and MI optimization works. */
931 pfile->mi_valid = false;
933 if (!pfile->state.skipping || result->type == CPP_EOF)
934 break;
937 return result;
940 /* Returns true if a fresh line has been loaded. */
941 bool
942 _cpp_get_fresh_line (cpp_reader *pfile)
944 int return_at_eof;
946 /* We can't get a new line until we leave the current directive. */
947 if (pfile->state.in_directive)
948 return false;
950 for (;;)
952 cpp_buffer *buffer = pfile->buffer;
954 if (!buffer->need_line)
955 return true;
957 if (buffer->next_line < buffer->rlimit)
959 _cpp_clean_line (pfile);
960 return true;
963 /* First, get out of parsing arguments state. */
964 if (pfile->state.parsing_args)
965 return false;
967 /* End of buffer. Non-empty files should end in a newline. */
968 if (buffer->buf != buffer->rlimit
969 && buffer->next_line > buffer->rlimit
970 && !buffer->from_stage3)
972 /* Clip to buffer size. */
973 buffer->next_line = buffer->rlimit;
976 return_at_eof = buffer->return_at_eof;
977 _cpp_pop_buffer (pfile);
978 if (pfile->buffer == NULL || return_at_eof)
979 return false;
983 #define IF_NEXT_IS(CHAR, THEN_TYPE, ELSE_TYPE) \
984 do \
986 result->type = ELSE_TYPE; \
987 if (*buffer->cur == CHAR) \
988 buffer->cur++, result->type = THEN_TYPE; \
990 while (0)
992 /* Lex a token into pfile->cur_token, which is also incremented, to
993 get diagnostics pointing to the correct location.
995 Does not handle issues such as token lookahead, multiple-include
996 optimization, directives, skipping etc. This function is only
997 suitable for use by _cpp_lex_token, and in special cases like
998 lex_expansion_token which doesn't care for any of these issues.
1000 When meeting a newline, returns CPP_EOF if parsing a directive,
1001 otherwise returns to the start of the token buffer if permissible.
1002 Returns the location of the lexed token. */
1003 cpp_token *
1004 _cpp_lex_direct (cpp_reader *pfile)
1006 cppchar_t c;
1007 cpp_buffer *buffer;
1008 const unsigned char *comment_start;
1009 cpp_token *result = pfile->cur_token++;
1011 fresh_line:
1012 result->flags = 0;
1013 buffer = pfile->buffer;
1014 if (buffer->need_line)
1016 if (pfile->state.in_deferred_pragma)
1018 result->type = CPP_PRAGMA_EOL;
1019 pfile->state.in_deferred_pragma = false;
1020 if (!pfile->state.pragma_allow_expansion)
1021 pfile->state.prevent_expansion--;
1022 return result;
1024 if (!_cpp_get_fresh_line (pfile))
1026 result->type = CPP_EOF;
1027 if (!pfile->state.in_directive)
1029 /* Tell the compiler the line number of the EOF token. */
1030 result->src_loc = pfile->line_table->highest_line;
1031 result->flags = BOL;
1033 return result;
1035 if (!pfile->keep_tokens)
1037 pfile->cur_run = &pfile->base_run;
1038 result = pfile->base_run.base;
1039 pfile->cur_token = result + 1;
1041 result->flags = BOL;
1042 if (pfile->state.parsing_args == 2)
1043 result->flags |= PREV_WHITE;
1045 buffer = pfile->buffer;
1046 update_tokens_line:
1047 result->src_loc = pfile->line_table->highest_line;
1049 skipped_white:
1050 if (buffer->cur >= buffer->notes[buffer->cur_note].pos
1051 && !pfile->overlaid_buffer)
1053 _cpp_process_line_notes (pfile, false);
1054 result->src_loc = pfile->line_table->highest_line;
1056 c = *buffer->cur++;
1058 LINEMAP_POSITION_FOR_COLUMN (result->src_loc, pfile->line_table,
1059 CPP_BUF_COLUMN (buffer, buffer->cur));
1061 switch (c)
1063 case ' ': case '\t': case '\f': case '\v': case '\0':
1064 result->flags |= PREV_WHITE;
1065 skip_whitespace (pfile, c);
1066 goto skipped_white;
1068 case '\n':
1069 if (buffer->cur < buffer->rlimit)
1070 CPP_INCREMENT_LINE (pfile, 0);
1071 buffer->need_line = true;
1072 goto fresh_line;
1074 case '0': case '1': case '2': case '3': case '4':
1075 case '5': case '6': case '7': case '8': case '9':
1077 struct normalize_state nst = INITIAL_NORMALIZE_STATE;
1078 result->type = CPP_NUMBER;
1079 lex_number (pfile, &result->val.str, &nst);
1080 warn_about_normalization (pfile, result, &nst);
1081 break;
1084 case 'L':
1085 case 'u':
1086 case 'U':
1087 /* 'L', 'u' or 'U' may introduce wide characters or strings. */
1088 if (c == 'L' || CPP_OPTION (pfile, uliterals))
1090 if (*buffer->cur == '\'' || *buffer->cur == '"')
1092 lex_string (pfile, result, buffer->cur - 1);
1093 break;
1096 /* Fall through. */
1098 case '_':
1099 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
1100 case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
1101 case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
1102 case 's': case 't': case 'v': case 'w': case 'x':
1103 case 'y': case 'z':
1104 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
1105 case 'G': case 'H': case 'I': case 'J': case 'K':
1106 case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
1107 case 'S': case 'T': case 'V': case 'W': case 'X':
1108 case 'Y': case 'Z':
1109 result->type = CPP_NAME;
1111 struct normalize_state nst = INITIAL_NORMALIZE_STATE;
1112 result->val.node = lex_identifier (pfile, buffer->cur - 1, false,
1113 &nst);
1114 warn_about_normalization (pfile, result, &nst);
1117 /* Convert named operators to their proper types. */
1118 if (result->val.node->flags & NODE_OPERATOR)
1120 result->flags |= NAMED_OP;
1121 result->type = (enum cpp_ttype) result->val.node->directive_index;
1123 break;
1125 case '\'':
1126 case '"':
1127 lex_string (pfile, result, buffer->cur - 1);
1128 break;
1130 case '/':
1131 /* A potential block or line comment. */
1132 comment_start = buffer->cur;
1133 c = *buffer->cur;
1135 if (c == '*')
1137 if (_cpp_skip_block_comment (pfile))
1138 cpp_error (pfile, CPP_DL_ERROR, "unterminated comment");
1140 else if (c == '/' && (CPP_OPTION (pfile, cplusplus_comments)
1141 || cpp_in_system_header (pfile)))
1143 /* Warn about comments only if pedantically GNUC89, and not
1144 in system headers. */
1145 if (CPP_OPTION (pfile, lang) == CLK_GNUC89 && CPP_PEDANTIC (pfile)
1146 && ! buffer->warned_cplusplus_comments)
1148 cpp_error (pfile, CPP_DL_PEDWARN,
1149 "C++ style comments are not allowed in ISO C90");
1150 cpp_error (pfile, CPP_DL_PEDWARN,
1151 "(this will be reported only once per input file)");
1152 buffer->warned_cplusplus_comments = 1;
1155 if (skip_line_comment (pfile) && CPP_OPTION (pfile, warn_comments))
1156 cpp_error (pfile, CPP_DL_WARNING, "multi-line comment");
1158 else if (c == '=')
1160 buffer->cur++;
1161 result->type = CPP_DIV_EQ;
1162 break;
1164 else
1166 result->type = CPP_DIV;
1167 break;
1170 if (!pfile->state.save_comments)
1172 result->flags |= PREV_WHITE;
1173 goto update_tokens_line;
1176 /* Save the comment as a token in its own right. */
1177 save_comment (pfile, result, comment_start, c);
1178 break;
1180 case '<':
1181 if (pfile->state.angled_headers)
1183 lex_string (pfile, result, buffer->cur - 1);
1184 break;
1187 result->type = CPP_LESS;
1188 if (*buffer->cur == '=')
1189 buffer->cur++, result->type = CPP_LESS_EQ;
1190 else if (*buffer->cur == '<')
1192 buffer->cur++;
1193 IF_NEXT_IS ('=', CPP_LSHIFT_EQ, CPP_LSHIFT);
1195 else if (CPP_OPTION (pfile, digraphs))
1197 if (*buffer->cur == ':')
1199 buffer->cur++;
1200 result->flags |= DIGRAPH;
1201 result->type = CPP_OPEN_SQUARE;
1203 else if (*buffer->cur == '%')
1205 buffer->cur++;
1206 result->flags |= DIGRAPH;
1207 result->type = CPP_OPEN_BRACE;
1210 break;
1212 case '>':
1213 result->type = CPP_GREATER;
1214 if (*buffer->cur == '=')
1215 buffer->cur++, result->type = CPP_GREATER_EQ;
1216 else if (*buffer->cur == '>')
1218 buffer->cur++;
1219 IF_NEXT_IS ('=', CPP_RSHIFT_EQ, CPP_RSHIFT);
1221 break;
1223 case '%':
1224 result->type = CPP_MOD;
1225 if (*buffer->cur == '=')
1226 buffer->cur++, result->type = CPP_MOD_EQ;
1227 else if (CPP_OPTION (pfile, digraphs))
1229 if (*buffer->cur == ':')
1231 buffer->cur++;
1232 result->flags |= DIGRAPH;
1233 result->type = CPP_HASH;
1234 if (*buffer->cur == '%' && buffer->cur[1] == ':')
1235 buffer->cur += 2, result->type = CPP_PASTE;
1237 else if (*buffer->cur == '>')
1239 buffer->cur++;
1240 result->flags |= DIGRAPH;
1241 result->type = CPP_CLOSE_BRACE;
1244 break;
1246 case '.':
1247 result->type = CPP_DOT;
1248 if (ISDIGIT (*buffer->cur))
1250 struct normalize_state nst = INITIAL_NORMALIZE_STATE;
1251 result->type = CPP_NUMBER;
1252 lex_number (pfile, &result->val.str, &nst);
1253 warn_about_normalization (pfile, result, &nst);
1255 else if (*buffer->cur == '.' && buffer->cur[1] == '.')
1256 buffer->cur += 2, result->type = CPP_ELLIPSIS;
1257 else if (*buffer->cur == '*' && CPP_OPTION (pfile, cplusplus))
1258 buffer->cur++, result->type = CPP_DOT_STAR;
1259 break;
1261 case '+':
1262 result->type = CPP_PLUS;
1263 if (*buffer->cur == '+')
1264 buffer->cur++, result->type = CPP_PLUS_PLUS;
1265 else if (*buffer->cur == '=')
1266 buffer->cur++, result->type = CPP_PLUS_EQ;
1267 break;
1269 case '-':
1270 result->type = CPP_MINUS;
1271 if (*buffer->cur == '>')
1273 buffer->cur++;
1274 result->type = CPP_DEREF;
1275 if (*buffer->cur == '*' && CPP_OPTION (pfile, cplusplus))
1276 buffer->cur++, result->type = CPP_DEREF_STAR;
1278 else if (*buffer->cur == '-')
1279 buffer->cur++, result->type = CPP_MINUS_MINUS;
1280 else if (*buffer->cur == '=')
1281 buffer->cur++, result->type = CPP_MINUS_EQ;
1282 break;
1284 case '&':
1285 result->type = CPP_AND;
1286 if (*buffer->cur == '&')
1287 buffer->cur++, result->type = CPP_AND_AND;
1288 else if (*buffer->cur == '=')
1289 buffer->cur++, result->type = CPP_AND_EQ;
1290 break;
1292 case '|':
1293 result->type = CPP_OR;
1294 if (*buffer->cur == '|')
1295 buffer->cur++, result->type = CPP_OR_OR;
1296 else if (*buffer->cur == '=')
1297 buffer->cur++, result->type = CPP_OR_EQ;
1298 break;
1300 case ':':
1301 result->type = CPP_COLON;
1302 if (*buffer->cur == ':' && CPP_OPTION (pfile, cplusplus))
1303 buffer->cur++, result->type = CPP_SCOPE;
1304 else if (*buffer->cur == '>' && CPP_OPTION (pfile, digraphs))
1306 buffer->cur++;
1307 result->flags |= DIGRAPH;
1308 result->type = CPP_CLOSE_SQUARE;
1310 break;
1312 case '*': IF_NEXT_IS ('=', CPP_MULT_EQ, CPP_MULT); break;
1313 case '=': IF_NEXT_IS ('=', CPP_EQ_EQ, CPP_EQ); break;
1314 case '!': IF_NEXT_IS ('=', CPP_NOT_EQ, CPP_NOT); break;
1315 case '^': IF_NEXT_IS ('=', CPP_XOR_EQ, CPP_XOR); break;
1316 case '#': IF_NEXT_IS ('#', CPP_PASTE, CPP_HASH); break;
1318 case '?': result->type = CPP_QUERY; break;
1319 case '~': result->type = CPP_COMPL; break;
1320 case ',': result->type = CPP_COMMA; break;
1321 case '(': result->type = CPP_OPEN_PAREN; break;
1322 case ')': result->type = CPP_CLOSE_PAREN; break;
1323 case '[': result->type = CPP_OPEN_SQUARE; break;
1324 case ']': result->type = CPP_CLOSE_SQUARE; break;
1325 case '{': result->type = CPP_OPEN_BRACE; break;
1326 case '}': result->type = CPP_CLOSE_BRACE; break;
1327 case ';': result->type = CPP_SEMICOLON; break;
1329 /* @ is a punctuator in Objective-C. */
1330 case '@': result->type = CPP_ATSIGN; break;
1332 case '$':
1333 case '\\':
1335 const uchar *base = --buffer->cur;
1336 struct normalize_state nst = INITIAL_NORMALIZE_STATE;
1338 if (forms_identifier_p (pfile, true, &nst))
1340 result->type = CPP_NAME;
1341 result->val.node = lex_identifier (pfile, base, true, &nst);
1342 warn_about_normalization (pfile, result, &nst);
1343 break;
1345 buffer->cur++;
1348 default:
1349 create_literal (pfile, result, buffer->cur - 1, 1, CPP_OTHER);
1350 break;
1353 return result;
1356 /* An upper bound on the number of bytes needed to spell TOKEN.
1357 Does not include preceding whitespace. */
1358 unsigned int
1359 cpp_token_len (const cpp_token *token)
1361 unsigned int len;
1363 switch (TOKEN_SPELL (token))
1365 default: len = 4; break;
1366 case SPELL_LITERAL: len = token->val.str.len; break;
1367 case SPELL_IDENT: len = NODE_LEN (token->val.node) * 10; break;
1370 return len;
1373 /* Parse UTF-8 out of NAMEP and place a \U escape in BUFFER.
1374 Return the number of bytes read out of NAME. (There are always
1375 10 bytes written to BUFFER.) */
1377 static size_t
1378 utf8_to_ucn (unsigned char *buffer, const unsigned char *name)
1380 int j;
1381 int ucn_len = 0;
1382 int ucn_len_c;
1383 unsigned t;
1384 unsigned long utf32;
1386 /* Compute the length of the UTF-8 sequence. */
1387 for (t = *name; t & 0x80; t <<= 1)
1388 ucn_len++;
1390 utf32 = *name & (0x7F >> ucn_len);
1391 for (ucn_len_c = 1; ucn_len_c < ucn_len; ucn_len_c++)
1393 utf32 = (utf32 << 6) | (*++name & 0x3F);
1395 /* Ill-formed UTF-8. */
1396 if ((*name & ~0x3F) != 0x80)
1397 abort ();
1400 *buffer++ = '\\';
1401 *buffer++ = 'U';
1402 for (j = 7; j >= 0; j--)
1403 *buffer++ = "0123456789abcdef"[(utf32 >> (4 * j)) & 0xF];
1404 return ucn_len;
1408 /* Write the spelling of a token TOKEN to BUFFER. The buffer must
1409 already contain the enough space to hold the token's spelling.
1410 Returns a pointer to the character after the last character written.
1411 FORSTRING is true if this is to be the spelling after translation
1412 phase 1 (this is different for UCNs).
1413 FIXME: Would be nice if we didn't need the PFILE argument. */
1414 unsigned char *
1415 cpp_spell_token (cpp_reader *pfile, const cpp_token *token,
1416 unsigned char *buffer, bool forstring)
1418 switch (TOKEN_SPELL (token))
1420 case SPELL_OPERATOR:
1422 const unsigned char *spelling;
1423 unsigned char c;
1425 if (token->flags & DIGRAPH)
1426 spelling
1427 = digraph_spellings[(int) token->type - (int) CPP_FIRST_DIGRAPH];
1428 else if (token->flags & NAMED_OP)
1429 goto spell_ident;
1430 else
1431 spelling = TOKEN_NAME (token);
1433 while ((c = *spelling++) != '\0')
1434 *buffer++ = c;
1436 break;
1438 spell_ident:
1439 case SPELL_IDENT:
1440 if (forstring)
1442 memcpy (buffer, NODE_NAME (token->val.node),
1443 NODE_LEN (token->val.node));
1444 buffer += NODE_LEN (token->val.node);
1446 else
1448 size_t i;
1449 const unsigned char * name = NODE_NAME (token->val.node);
1451 for (i = 0; i < NODE_LEN (token->val.node); i++)
1452 if (name[i] & ~0x7F)
1454 i += utf8_to_ucn (buffer, name + i) - 1;
1455 buffer += 10;
1457 else
1458 *buffer++ = NODE_NAME (token->val.node)[i];
1460 break;
1462 case SPELL_LITERAL:
1463 memcpy (buffer, token->val.str.text, token->val.str.len);
1464 buffer += token->val.str.len;
1465 break;
1467 case SPELL_NONE:
1468 cpp_error (pfile, CPP_DL_ICE,
1469 "unspellable token %s", TOKEN_NAME (token));
1470 break;
1473 return buffer;
1476 /* Returns TOKEN spelt as a null-terminated string. The string is
1477 freed when the reader is destroyed. Useful for diagnostics. */
1478 unsigned char *
1479 cpp_token_as_text (cpp_reader *pfile, const cpp_token *token)
1481 unsigned int len = cpp_token_len (token) + 1;
1482 unsigned char *start = _cpp_unaligned_alloc (pfile, len), *end;
1484 end = cpp_spell_token (pfile, token, start, false);
1485 end[0] = '\0';
1487 return start;
1490 /* Used by C front ends, which really should move to using
1491 cpp_token_as_text. */
1492 const char *
1493 cpp_type2name (enum cpp_ttype type)
1495 return (const char *) token_spellings[type].name;
1498 /* Writes the spelling of token to FP, without any preceding space.
1499 Separated from cpp_spell_token for efficiency - to avoid stdio
1500 double-buffering. */
1501 void
1502 cpp_output_token (const cpp_token *token, FILE *fp)
1504 switch (TOKEN_SPELL (token))
1506 case SPELL_OPERATOR:
1508 const unsigned char *spelling;
1509 int c;
1511 if (token->flags & DIGRAPH)
1512 spelling
1513 = digraph_spellings[(int) token->type - (int) CPP_FIRST_DIGRAPH];
1514 else if (token->flags & NAMED_OP)
1515 goto spell_ident;
1516 else
1517 spelling = TOKEN_NAME (token);
1519 c = *spelling;
1521 putc (c, fp);
1522 while ((c = *++spelling) != '\0');
1524 break;
1526 spell_ident:
1527 case SPELL_IDENT:
1529 size_t i;
1530 const unsigned char * name = NODE_NAME (token->val.node);
1532 for (i = 0; i < NODE_LEN (token->val.node); i++)
1533 if (name[i] & ~0x7F)
1535 unsigned char buffer[10];
1536 i += utf8_to_ucn (buffer, name + i) - 1;
1537 fwrite (buffer, 1, 10, fp);
1539 else
1540 fputc (NODE_NAME (token->val.node)[i], fp);
1542 break;
1544 case SPELL_LITERAL:
1545 fwrite (token->val.str.text, 1, token->val.str.len, fp);
1546 break;
1548 case SPELL_NONE:
1549 /* An error, most probably. */
1550 break;
1554 /* Compare two tokens. */
1556 _cpp_equiv_tokens (const cpp_token *a, const cpp_token *b)
1558 if (a->type == b->type && a->flags == b->flags)
1559 switch (TOKEN_SPELL (a))
1561 default: /* Keep compiler happy. */
1562 case SPELL_OPERATOR:
1563 return 1;
1564 case SPELL_NONE:
1565 return (a->type != CPP_MACRO_ARG || a->val.arg_no == b->val.arg_no);
1566 case SPELL_IDENT:
1567 return a->val.node == b->val.node;
1568 case SPELL_LITERAL:
1569 return (a->val.str.len == b->val.str.len
1570 && !memcmp (a->val.str.text, b->val.str.text,
1571 a->val.str.len));
1574 return 0;
1577 /* Returns nonzero if a space should be inserted to avoid an
1578 accidental token paste for output. For simplicity, it is
1579 conservative, and occasionally advises a space where one is not
1580 needed, e.g. "." and ".2". */
1582 cpp_avoid_paste (cpp_reader *pfile, const cpp_token *token1,
1583 const cpp_token *token2)
1585 enum cpp_ttype a = token1->type, b = token2->type;
1586 cppchar_t c;
1588 if (token1->flags & NAMED_OP)
1589 a = CPP_NAME;
1590 if (token2->flags & NAMED_OP)
1591 b = CPP_NAME;
1593 c = EOF;
1594 if (token2->flags & DIGRAPH)
1595 c = digraph_spellings[(int) b - (int) CPP_FIRST_DIGRAPH][0];
1596 else if (token_spellings[b].category == SPELL_OPERATOR)
1597 c = token_spellings[b].name[0];
1599 /* Quickly get everything that can paste with an '='. */
1600 if ((int) a <= (int) CPP_LAST_EQ && c == '=')
1601 return 1;
1603 switch (a)
1605 case CPP_GREATER: return c == '>';
1606 case CPP_LESS: return c == '<' || c == '%' || c == ':';
1607 case CPP_PLUS: return c == '+';
1608 case CPP_MINUS: return c == '-' || c == '>';
1609 case CPP_DIV: return c == '/' || c == '*'; /* Comments. */
1610 case CPP_MOD: return c == ':' || c == '>';
1611 case CPP_AND: return c == '&';
1612 case CPP_OR: return c == '|';
1613 case CPP_COLON: return c == ':' || c == '>';
1614 case CPP_DEREF: return c == '*';
1615 case CPP_DOT: return c == '.' || c == '%' || b == CPP_NUMBER;
1616 case CPP_HASH: return c == '#' || c == '%'; /* Digraph form. */
1617 case CPP_NAME: return ((b == CPP_NUMBER
1618 && name_p (pfile, &token2->val.str))
1619 || b == CPP_NAME
1620 || b == CPP_CHAR || b == CPP_STRING); /* L */
1621 case CPP_NUMBER: return (b == CPP_NUMBER || b == CPP_NAME
1622 || c == '.' || c == '+' || c == '-');
1623 /* UCNs */
1624 case CPP_OTHER: return ((token1->val.str.text[0] == '\\'
1625 && b == CPP_NAME)
1626 || (CPP_OPTION (pfile, objc)
1627 && token1->val.str.text[0] == '@'
1628 && (b == CPP_NAME || b == CPP_STRING)));
1629 default: break;
1632 return 0;
1635 /* Output all the remaining tokens on the current line, and a newline
1636 character, to FP. Leading whitespace is removed. If there are
1637 macros, special token padding is not performed. */
1638 void
1639 cpp_output_line (cpp_reader *pfile, FILE *fp)
1641 const cpp_token *token;
1643 token = cpp_get_token (pfile);
1644 while (token->type != CPP_EOF)
1646 cpp_output_token (token, fp);
1647 token = cpp_get_token (pfile);
1648 if (token->flags & PREV_WHITE)
1649 putc (' ', fp);
1652 putc ('\n', fp);
1655 /* Return a string representation of all the remaining tokens on the
1656 current line. The result is allocated using xmalloc and must be
1657 freed by the caller. */
1658 unsigned char *
1659 cpp_output_line_to_string (cpp_reader *pfile, const unsigned char *dir_name)
1661 const cpp_token *token;
1662 unsigned int out = dir_name ? ustrlen (dir_name) : 0;
1663 unsigned int alloced = 120 + out;
1664 unsigned char *result = (unsigned char *) xmalloc (alloced);
1666 /* If DIR_NAME is empty, there are no initial contents. */
1667 if (dir_name)
1669 sprintf ((char *) result, "#%s ", dir_name);
1670 out += 2;
1673 token = cpp_get_token (pfile);
1674 while (token->type != CPP_EOF)
1676 unsigned char *last;
1677 /* Include room for a possible space and the terminating nul. */
1678 unsigned int len = cpp_token_len (token) + 2;
1680 if (out + len > alloced)
1682 alloced *= 2;
1683 if (out + len > alloced)
1684 alloced = out + len;
1685 result = (unsigned char *) xrealloc (result, alloced);
1688 last = cpp_spell_token (pfile, token, &result[out], 0);
1689 out = last - result;
1691 token = cpp_get_token (pfile);
1692 if (token->flags & PREV_WHITE)
1693 result[out++] = ' ';
1696 result[out] = '\0';
1697 return result;
1700 /* Memory buffers. Changing these three constants can have a dramatic
1701 effect on performance. The values here are reasonable defaults,
1702 but might be tuned. If you adjust them, be sure to test across a
1703 range of uses of cpplib, including heavy nested function-like macro
1704 expansion. Also check the change in peak memory usage (NJAMD is a
1705 good tool for this). */
1706 #define MIN_BUFF_SIZE 8000
1707 #define BUFF_SIZE_UPPER_BOUND(MIN_SIZE) (MIN_BUFF_SIZE + (MIN_SIZE) * 3 / 2)
1708 #define EXTENDED_BUFF_SIZE(BUFF, MIN_EXTRA) \
1709 (MIN_EXTRA + ((BUFF)->limit - (BUFF)->cur) * 2)
1711 #if MIN_BUFF_SIZE > BUFF_SIZE_UPPER_BOUND (0)
1712 #error BUFF_SIZE_UPPER_BOUND must be at least as large as MIN_BUFF_SIZE!
1713 #endif
1715 /* Create a new allocation buffer. Place the control block at the end
1716 of the buffer, so that buffer overflows will cause immediate chaos. */
1717 static _cpp_buff *
1718 new_buff (size_t len)
1720 _cpp_buff *result;
1721 unsigned char *base;
1723 if (len < MIN_BUFF_SIZE)
1724 len = MIN_BUFF_SIZE;
1725 len = CPP_ALIGN (len);
1727 base = XNEWVEC (unsigned char, len + sizeof (_cpp_buff));
1728 result = (_cpp_buff *) (base + len);
1729 result->base = base;
1730 result->cur = base;
1731 result->limit = base + len;
1732 result->next = NULL;
1733 return result;
1736 /* Place a chain of unwanted allocation buffers on the free list. */
1737 void
1738 _cpp_release_buff (cpp_reader *pfile, _cpp_buff *buff)
1740 _cpp_buff *end = buff;
1742 while (end->next)
1743 end = end->next;
1744 end->next = pfile->free_buffs;
1745 pfile->free_buffs = buff;
1748 /* Return a free buffer of size at least MIN_SIZE. */
1749 _cpp_buff *
1750 _cpp_get_buff (cpp_reader *pfile, size_t min_size)
1752 _cpp_buff *result, **p;
1754 for (p = &pfile->free_buffs;; p = &(*p)->next)
1756 size_t size;
1758 if (*p == NULL)
1759 return new_buff (min_size);
1760 result = *p;
1761 size = result->limit - result->base;
1762 /* Return a buffer that's big enough, but don't waste one that's
1763 way too big. */
1764 if (size >= min_size && size <= BUFF_SIZE_UPPER_BOUND (min_size))
1765 break;
1768 *p = result->next;
1769 result->next = NULL;
1770 result->cur = result->base;
1771 return result;
1774 /* Creates a new buffer with enough space to hold the uncommitted
1775 remaining bytes of BUFF, and at least MIN_EXTRA more bytes. Copies
1776 the excess bytes to the new buffer. Chains the new buffer after
1777 BUFF, and returns the new buffer. */
1778 _cpp_buff *
1779 _cpp_append_extend_buff (cpp_reader *pfile, _cpp_buff *buff, size_t min_extra)
1781 size_t size = EXTENDED_BUFF_SIZE (buff, min_extra);
1782 _cpp_buff *new_buff = _cpp_get_buff (pfile, size);
1784 buff->next = new_buff;
1785 memcpy (new_buff->base, buff->cur, BUFF_ROOM (buff));
1786 return new_buff;
1789 /* Creates a new buffer with enough space to hold the uncommitted
1790 remaining bytes of the buffer pointed to by BUFF, and at least
1791 MIN_EXTRA more bytes. Copies the excess bytes to the new buffer.
1792 Chains the new buffer before the buffer pointed to by BUFF, and
1793 updates the pointer to point to the new buffer. */
1794 void
1795 _cpp_extend_buff (cpp_reader *pfile, _cpp_buff **pbuff, size_t min_extra)
1797 _cpp_buff *new_buff, *old_buff = *pbuff;
1798 size_t size = EXTENDED_BUFF_SIZE (old_buff, min_extra);
1800 new_buff = _cpp_get_buff (pfile, size);
1801 memcpy (new_buff->base, old_buff->cur, BUFF_ROOM (old_buff));
1802 new_buff->next = old_buff;
1803 *pbuff = new_buff;
1806 /* Free a chain of buffers starting at BUFF. */
1807 void
1808 _cpp_free_buff (_cpp_buff *buff)
1810 _cpp_buff *next;
1812 for (; buff; buff = next)
1814 next = buff->next;
1815 free (buff->base);
1819 /* Allocate permanent, unaligned storage of length LEN. */
1820 unsigned char *
1821 _cpp_unaligned_alloc (cpp_reader *pfile, size_t len)
1823 _cpp_buff *buff = pfile->u_buff;
1824 unsigned char *result = buff->cur;
1826 if (len > (size_t) (buff->limit - result))
1828 buff = _cpp_get_buff (pfile, len);
1829 buff->next = pfile->u_buff;
1830 pfile->u_buff = buff;
1831 result = buff->cur;
1834 buff->cur = result + len;
1835 return result;
1838 /* Allocate permanent, unaligned storage of length LEN from a_buff.
1839 That buffer is used for growing allocations when saving macro
1840 replacement lists in a #define, and when parsing an answer to an
1841 assertion in #assert, #unassert or #if (and therefore possibly
1842 whilst expanding macros). It therefore must not be used by any
1843 code that they might call: specifically the lexer and the guts of
1844 the macro expander.
1846 All existing other uses clearly fit this restriction: storing
1847 registered pragmas during initialization. */
1848 unsigned char *
1849 _cpp_aligned_alloc (cpp_reader *pfile, size_t len)
1851 _cpp_buff *buff = pfile->a_buff;
1852 unsigned char *result = buff->cur;
1854 if (len > (size_t) (buff->limit - result))
1856 buff = _cpp_get_buff (pfile, len);
1857 buff->next = pfile->a_buff;
1858 pfile->a_buff = buff;
1859 result = buff->cur;
1862 buff->cur = result + len;
1863 return result;
1866 /* Say which field of TOK is in use. */
1868 enum cpp_token_fld_kind
1869 cpp_token_val_index (cpp_token *tok)
1871 switch (TOKEN_SPELL (tok))
1873 case SPELL_IDENT:
1874 return CPP_TOKEN_FLD_NODE;
1875 case SPELL_LITERAL:
1876 return CPP_TOKEN_FLD_STR;
1877 case SPELL_NONE:
1878 if (tok->type == CPP_MACRO_ARG)
1879 return CPP_TOKEN_FLD_ARG_NO;
1880 else if (tok->type == CPP_PADDING)
1881 return CPP_TOKEN_FLD_SOURCE;
1882 else if (tok->type == CPP_PRAGMA)
1883 return CPP_TOKEN_FLD_PRAGMA;
1884 /* else fall through */
1885 default:
1886 return CPP_TOKEN_FLD_NONE;