2009-04-20 Paul Thomas <pault@gcc.gnu.org>
[official-gcc.git] / libcpp / lex.c
blob63e291c64c0ea0c8262f489fb92f4c88667cf66c
1 /* CPP Library - lexical analysis.
2 Copyright (C) 2000, 2001, 2002, 2003, 2004, 2005, 2007, 2008, 2009
3 Free Software Foundation, Inc.
4 Contributed by Per Bothner, 1994-95.
5 Based on CCCP program by Paul Rubin, June 1986
6 Adapted to ANSI C, Richard Stallman, Jan 1987
7 Broken out to separate file, Zack Weinberg, Mar 2000
9 This program is free software; you can redistribute it and/or modify it
10 under the terms of the GNU General Public License as published by the
11 Free Software Foundation; either version 3, or (at your option) any
12 later version.
14 This program is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
19 You should have received a copy of the GNU General Public License
20 along with this program; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
23 #include "config.h"
24 #include "system.h"
25 #include "cpplib.h"
26 #include "internal.h"
28 enum spell_type
30 SPELL_OPERATOR = 0,
31 SPELL_IDENT,
32 SPELL_LITERAL,
33 SPELL_NONE
36 struct token_spelling
38 enum spell_type category;
39 const unsigned char *name;
42 static const unsigned char *const digraph_spellings[] =
43 { UC"%:", UC"%:%:", UC"<:", UC":>", UC"<%", UC"%>" };
45 #define OP(e, s) { SPELL_OPERATOR, UC s },
46 #define TK(e, s) { SPELL_ ## s, UC #e },
47 static const struct token_spelling token_spellings[N_TTYPES] = { TTYPE_TABLE };
48 #undef OP
49 #undef TK
51 #define TOKEN_SPELL(token) (token_spellings[(token)->type].category)
52 #define TOKEN_NAME(token) (token_spellings[(token)->type].name)
54 static void add_line_note (cpp_buffer *, const uchar *, unsigned int);
55 static int skip_line_comment (cpp_reader *);
56 static void skip_whitespace (cpp_reader *, cppchar_t);
57 static void lex_string (cpp_reader *, cpp_token *, const uchar *);
58 static void save_comment (cpp_reader *, cpp_token *, const uchar *, cppchar_t);
59 static void store_comment (cpp_reader *, cpp_token *);
60 static void create_literal (cpp_reader *, cpp_token *, const uchar *,
61 unsigned int, enum cpp_ttype);
62 static bool warn_in_comment (cpp_reader *, _cpp_line_note *);
63 static int name_p (cpp_reader *, const cpp_string *);
64 static tokenrun *next_tokenrun (tokenrun *);
66 static _cpp_buff *new_buff (size_t);
69 /* Utility routine:
71 Compares, the token TOKEN to the NUL-terminated string STRING.
72 TOKEN must be a CPP_NAME. Returns 1 for equal, 0 for unequal. */
73 int
74 cpp_ideq (const cpp_token *token, const char *string)
76 if (token->type != CPP_NAME)
77 return 0;
79 return !ustrcmp (NODE_NAME (token->val.node), (const uchar *) string);
82 /* Record a note TYPE at byte POS into the current cleaned logical
83 line. */
84 static void
85 add_line_note (cpp_buffer *buffer, const uchar *pos, unsigned int type)
87 if (buffer->notes_used == buffer->notes_cap)
89 buffer->notes_cap = buffer->notes_cap * 2 + 200;
90 buffer->notes = XRESIZEVEC (_cpp_line_note, buffer->notes,
91 buffer->notes_cap);
94 buffer->notes[buffer->notes_used].pos = pos;
95 buffer->notes[buffer->notes_used].type = type;
96 buffer->notes_used++;
99 /* Returns with a logical line that contains no escaped newlines or
100 trigraphs. This is a time-critical inner loop. */
101 void
102 _cpp_clean_line (cpp_reader *pfile)
104 cpp_buffer *buffer;
105 const uchar *s;
106 uchar c, *d, *p;
108 buffer = pfile->buffer;
109 buffer->cur_note = buffer->notes_used = 0;
110 buffer->cur = buffer->line_base = buffer->next_line;
111 buffer->need_line = false;
112 s = buffer->next_line - 1;
114 if (!buffer->from_stage3)
116 const uchar *pbackslash = NULL;
118 /* Short circuit for the common case of an un-escaped line with
119 no trigraphs. The primary win here is by not writing any
120 data back to memory until we have to. */
121 for (;;)
123 c = *++s;
124 if (__builtin_expect (c == '\n', false)
125 || __builtin_expect (c == '\r', false))
127 d = (uchar *) s;
129 if (__builtin_expect (s == buffer->rlimit, false))
130 goto done;
132 /* DOS line ending? */
133 if (__builtin_expect (c == '\r', false)
134 && s[1] == '\n')
136 s++;
137 if (s == buffer->rlimit)
138 goto done;
141 if (__builtin_expect (pbackslash == NULL, true))
142 goto done;
144 /* Check for escaped newline. */
145 p = d;
146 while (is_nvspace (p[-1]))
147 p--;
148 if (p - 1 != pbackslash)
149 goto done;
151 /* Have an escaped newline; process it and proceed to
152 the slow path. */
153 add_line_note (buffer, p - 1, p != d ? ' ' : '\\');
154 d = p - 2;
155 buffer->next_line = p - 1;
156 break;
158 if (__builtin_expect (c == '\\', false))
159 pbackslash = s;
160 else if (__builtin_expect (c == '?', false)
161 && __builtin_expect (s[1] == '?', false)
162 && _cpp_trigraph_map[s[2]])
164 /* Have a trigraph. We may or may not have to convert
165 it. Add a line note regardless, for -Wtrigraphs. */
166 add_line_note (buffer, s, s[2]);
167 if (CPP_OPTION (pfile, trigraphs))
169 /* We do, and that means we have to switch to the
170 slow path. */
171 d = (uchar *) s;
172 *d = _cpp_trigraph_map[s[2]];
173 s += 2;
174 break;
180 for (;;)
182 c = *++s;
183 *++d = c;
185 if (c == '\n' || c == '\r')
187 /* Handle DOS line endings. */
188 if (c == '\r' && s != buffer->rlimit && s[1] == '\n')
189 s++;
190 if (s == buffer->rlimit)
191 break;
193 /* Escaped? */
194 p = d;
195 while (p != buffer->next_line && is_nvspace (p[-1]))
196 p--;
197 if (p == buffer->next_line || p[-1] != '\\')
198 break;
200 add_line_note (buffer, p - 1, p != d ? ' ': '\\');
201 d = p - 2;
202 buffer->next_line = p - 1;
204 else if (c == '?' && s[1] == '?' && _cpp_trigraph_map[s[2]])
206 /* Add a note regardless, for the benefit of -Wtrigraphs. */
207 add_line_note (buffer, d, s[2]);
208 if (CPP_OPTION (pfile, trigraphs))
210 *d = _cpp_trigraph_map[s[2]];
211 s += 2;
216 else
219 s++;
220 while (*s != '\n' && *s != '\r');
221 d = (uchar *) s;
223 /* Handle DOS line endings. */
224 if (*s == '\r' && s != buffer->rlimit && s[1] == '\n')
225 s++;
228 done:
229 *d = '\n';
230 /* A sentinel note that should never be processed. */
231 add_line_note (buffer, d + 1, '\n');
232 buffer->next_line = s + 1;
235 /* Return true if the trigraph indicated by NOTE should be warned
236 about in a comment. */
237 static bool
238 warn_in_comment (cpp_reader *pfile, _cpp_line_note *note)
240 const uchar *p;
242 /* Within comments we don't warn about trigraphs, unless the
243 trigraph forms an escaped newline, as that may change
244 behavior. */
245 if (note->type != '/')
246 return false;
248 /* If -trigraphs, then this was an escaped newline iff the next note
249 is coincident. */
250 if (CPP_OPTION (pfile, trigraphs))
251 return note[1].pos == note->pos;
253 /* Otherwise, see if this forms an escaped newline. */
254 p = note->pos + 3;
255 while (is_nvspace (*p))
256 p++;
258 /* There might have been escaped newlines between the trigraph and the
259 newline we found. Hence the position test. */
260 return (*p == '\n' && p < note[1].pos);
263 /* Process the notes created by add_line_note as far as the current
264 location. */
265 void
266 _cpp_process_line_notes (cpp_reader *pfile, int in_comment)
268 cpp_buffer *buffer = pfile->buffer;
270 for (;;)
272 _cpp_line_note *note = &buffer->notes[buffer->cur_note];
273 unsigned int col;
275 if (note->pos > buffer->cur)
276 break;
278 buffer->cur_note++;
279 col = CPP_BUF_COLUMN (buffer, note->pos + 1);
281 if (note->type == '\\' || note->type == ' ')
283 if (note->type == ' ' && !in_comment)
284 cpp_error_with_line (pfile, CPP_DL_WARNING, pfile->line_table->highest_line, col,
285 "backslash and newline separated by space");
287 if (buffer->next_line > buffer->rlimit)
289 cpp_error_with_line (pfile, CPP_DL_PEDWARN, pfile->line_table->highest_line, col,
290 "backslash-newline at end of file");
291 /* Prevent "no newline at end of file" warning. */
292 buffer->next_line = buffer->rlimit;
295 buffer->line_base = note->pos;
296 CPP_INCREMENT_LINE (pfile, 0);
298 else if (_cpp_trigraph_map[note->type])
300 if (CPP_OPTION (pfile, warn_trigraphs)
301 && (!in_comment || warn_in_comment (pfile, note)))
303 if (CPP_OPTION (pfile, trigraphs))
304 cpp_error_with_line (pfile, CPP_DL_WARNING, pfile->line_table->highest_line, col,
305 "trigraph ??%c converted to %c",
306 note->type,
307 (int) _cpp_trigraph_map[note->type]);
308 else
310 cpp_error_with_line
311 (pfile, CPP_DL_WARNING, pfile->line_table->highest_line, col,
312 "trigraph ??%c ignored, use -trigraphs to enable",
313 note->type);
317 else
318 abort ();
322 /* Skip a C-style block comment. We find the end of the comment by
323 seeing if an asterisk is before every '/' we encounter. Returns
324 nonzero if comment terminated by EOF, zero otherwise.
326 Buffer->cur points to the initial asterisk of the comment. */
327 bool
328 _cpp_skip_block_comment (cpp_reader *pfile)
330 cpp_buffer *buffer = pfile->buffer;
331 const uchar *cur = buffer->cur;
332 uchar c;
334 cur++;
335 if (*cur == '/')
336 cur++;
338 for (;;)
340 /* People like decorating comments with '*', so check for '/'
341 instead for efficiency. */
342 c = *cur++;
344 if (c == '/')
346 if (cur[-2] == '*')
347 break;
349 /* Warn about potential nested comments, but not if the '/'
350 comes immediately before the true comment delimiter.
351 Don't bother to get it right across escaped newlines. */
352 if (CPP_OPTION (pfile, warn_comments)
353 && cur[0] == '*' && cur[1] != '/')
355 buffer->cur = cur;
356 cpp_error_with_line (pfile, CPP_DL_WARNING,
357 pfile->line_table->highest_line, CPP_BUF_COL (buffer),
358 "\"/*\" within comment");
361 else if (c == '\n')
363 unsigned int cols;
364 buffer->cur = cur - 1;
365 _cpp_process_line_notes (pfile, true);
366 if (buffer->next_line >= buffer->rlimit)
367 return true;
368 _cpp_clean_line (pfile);
370 cols = buffer->next_line - buffer->line_base;
371 CPP_INCREMENT_LINE (pfile, cols);
373 cur = buffer->cur;
377 buffer->cur = cur;
378 _cpp_process_line_notes (pfile, true);
379 return false;
382 /* Skip a C++ line comment, leaving buffer->cur pointing to the
383 terminating newline. Handles escaped newlines. Returns nonzero
384 if a multiline comment. */
385 static int
386 skip_line_comment (cpp_reader *pfile)
388 cpp_buffer *buffer = pfile->buffer;
389 source_location orig_line = pfile->line_table->highest_line;
391 while (*buffer->cur != '\n')
392 buffer->cur++;
394 _cpp_process_line_notes (pfile, true);
395 return orig_line != pfile->line_table->highest_line;
398 /* Skips whitespace, saving the next non-whitespace character. */
399 static void
400 skip_whitespace (cpp_reader *pfile, cppchar_t c)
402 cpp_buffer *buffer = pfile->buffer;
403 bool saw_NUL = false;
407 /* Horizontal space always OK. */
408 if (c == ' ' || c == '\t')
410 /* Just \f \v or \0 left. */
411 else if (c == '\0')
412 saw_NUL = true;
413 else if (pfile->state.in_directive && CPP_PEDANTIC (pfile))
414 cpp_error_with_line (pfile, CPP_DL_PEDWARN, pfile->line_table->highest_line,
415 CPP_BUF_COL (buffer),
416 "%s in preprocessing directive",
417 c == '\f' ? "form feed" : "vertical tab");
419 c = *buffer->cur++;
421 /* We only want non-vertical space, i.e. ' ' \t \f \v \0. */
422 while (is_nvspace (c));
424 if (saw_NUL)
425 cpp_error (pfile, CPP_DL_WARNING, "null character(s) ignored");
427 buffer->cur--;
430 /* See if the characters of a number token are valid in a name (no
431 '.', '+' or '-'). */
432 static int
433 name_p (cpp_reader *pfile, const cpp_string *string)
435 unsigned int i;
437 for (i = 0; i < string->len; i++)
438 if (!is_idchar (string->text[i]))
439 return 0;
441 return 1;
444 /* After parsing an identifier or other sequence, produce a warning about
445 sequences not in NFC/NFKC. */
446 static void
447 warn_about_normalization (cpp_reader *pfile,
448 const cpp_token *token,
449 const struct normalize_state *s)
451 if (CPP_OPTION (pfile, warn_normalize) < NORMALIZE_STATE_RESULT (s)
452 && !pfile->state.skipping)
454 /* Make sure that the token is printed using UCNs, even
455 if we'd otherwise happily print UTF-8. */
456 unsigned char *buf = XNEWVEC (unsigned char, cpp_token_len (token));
457 size_t sz;
459 sz = cpp_spell_token (pfile, token, buf, false) - buf;
460 if (NORMALIZE_STATE_RESULT (s) == normalized_C)
461 cpp_error_with_line (pfile, CPP_DL_WARNING, token->src_loc, 0,
462 "`%.*s' is not in NFKC", (int) sz, buf);
463 else
464 cpp_error_with_line (pfile, CPP_DL_WARNING, token->src_loc, 0,
465 "`%.*s' is not in NFC", (int) sz, buf);
469 /* Returns TRUE if the sequence starting at buffer->cur is invalid in
470 an identifier. FIRST is TRUE if this starts an identifier. */
471 static bool
472 forms_identifier_p (cpp_reader *pfile, int first,
473 struct normalize_state *state)
475 cpp_buffer *buffer = pfile->buffer;
477 if (*buffer->cur == '$')
479 if (!CPP_OPTION (pfile, dollars_in_ident))
480 return false;
482 buffer->cur++;
483 if (CPP_OPTION (pfile, warn_dollars) && !pfile->state.skipping)
485 CPP_OPTION (pfile, warn_dollars) = 0;
486 cpp_error (pfile, CPP_DL_PEDWARN, "'$' in identifier or number");
489 return true;
492 /* Is this a syntactically valid UCN? */
493 if (CPP_OPTION (pfile, extended_identifiers)
494 && *buffer->cur == '\\'
495 && (buffer->cur[1] == 'u' || buffer->cur[1] == 'U'))
497 buffer->cur += 2;
498 if (_cpp_valid_ucn (pfile, &buffer->cur, buffer->rlimit, 1 + !first,
499 state))
500 return true;
501 buffer->cur -= 2;
504 return false;
507 /* Lex an identifier starting at BUFFER->CUR - 1. */
508 static cpp_hashnode *
509 lex_identifier (cpp_reader *pfile, const uchar *base, bool starts_ucn,
510 struct normalize_state *nst)
512 cpp_hashnode *result;
513 const uchar *cur;
514 unsigned int len;
515 unsigned int hash = HT_HASHSTEP (0, *base);
517 cur = pfile->buffer->cur;
518 if (! starts_ucn)
519 while (ISIDNUM (*cur))
521 hash = HT_HASHSTEP (hash, *cur);
522 cur++;
524 pfile->buffer->cur = cur;
525 if (starts_ucn || forms_identifier_p (pfile, false, nst))
527 /* Slower version for identifiers containing UCNs (or $). */
528 do {
529 while (ISIDNUM (*pfile->buffer->cur))
531 pfile->buffer->cur++;
532 NORMALIZE_STATE_UPDATE_IDNUM (nst);
534 } while (forms_identifier_p (pfile, false, nst));
535 result = _cpp_interpret_identifier (pfile, base,
536 pfile->buffer->cur - base);
538 else
540 len = cur - base;
541 hash = HT_HASHFINISH (hash, len);
543 result = CPP_HASHNODE (ht_lookup_with_hash (pfile->hash_table,
544 base, len, hash, HT_ALLOC));
547 /* Rarely, identifiers require diagnostics when lexed. */
548 if (__builtin_expect ((result->flags & NODE_DIAGNOSTIC)
549 && !pfile->state.skipping, 0))
551 /* It is allowed to poison the same identifier twice. */
552 if ((result->flags & NODE_POISONED) && !pfile->state.poisoned_ok)
553 cpp_error (pfile, CPP_DL_ERROR, "attempt to use poisoned \"%s\"",
554 NODE_NAME (result));
556 /* Constraint 6.10.3.5: __VA_ARGS__ should only appear in the
557 replacement list of a variadic macro. */
558 if (result == pfile->spec_nodes.n__VA_ARGS__
559 && !pfile->state.va_args_ok)
560 cpp_error (pfile, CPP_DL_PEDWARN,
561 "__VA_ARGS__ can only appear in the expansion"
562 " of a C99 variadic macro");
565 return result;
568 /* Lex a number to NUMBER starting at BUFFER->CUR - 1. */
569 static void
570 lex_number (cpp_reader *pfile, cpp_string *number,
571 struct normalize_state *nst)
573 const uchar *cur;
574 const uchar *base;
575 uchar *dest;
577 base = pfile->buffer->cur - 1;
580 cur = pfile->buffer->cur;
582 /* N.B. ISIDNUM does not include $. */
583 while (ISIDNUM (*cur) || *cur == '.' || VALID_SIGN (*cur, cur[-1]))
585 cur++;
586 NORMALIZE_STATE_UPDATE_IDNUM (nst);
589 pfile->buffer->cur = cur;
591 while (forms_identifier_p (pfile, false, nst));
593 number->len = cur - base;
594 dest = _cpp_unaligned_alloc (pfile, number->len + 1);
595 memcpy (dest, base, number->len);
596 dest[number->len] = '\0';
597 number->text = dest;
600 /* Create a token of type TYPE with a literal spelling. */
601 static void
602 create_literal (cpp_reader *pfile, cpp_token *token, const uchar *base,
603 unsigned int len, enum cpp_ttype type)
605 uchar *dest = _cpp_unaligned_alloc (pfile, len + 1);
607 memcpy (dest, base, len);
608 dest[len] = '\0';
609 token->type = type;
610 token->val.str.len = len;
611 token->val.str.text = dest;
614 /* Lexes a string, character constant, or angle-bracketed header file
615 name. The stored string contains the spelling, including opening
616 quote and leading any leading 'L', 'u' or 'U'. It returns the type
617 of the literal, or CPP_OTHER if it was not properly terminated, or
618 CPP_LESS for an unterminated header name which must be relexed as
619 normal tokens.
621 The spelling is NUL-terminated, but it is not guaranteed that this
622 is the first NUL since embedded NULs are preserved. */
623 static void
624 lex_string (cpp_reader *pfile, cpp_token *token, const uchar *base)
626 bool saw_NUL = false;
627 const uchar *cur;
628 cppchar_t terminator;
629 enum cpp_ttype type;
631 cur = base;
632 terminator = *cur++;
633 if (terminator == 'L' || terminator == 'u' || terminator == 'U')
634 terminator = *cur++;
635 if (terminator == '\"')
636 type = (*base == 'L' ? CPP_WSTRING :
637 *base == 'U' ? CPP_STRING32 :
638 *base == 'u' ? CPP_STRING16 : CPP_STRING);
639 else if (terminator == '\'')
640 type = (*base == 'L' ? CPP_WCHAR :
641 *base == 'U' ? CPP_CHAR32 :
642 *base == 'u' ? CPP_CHAR16 : CPP_CHAR);
643 else
644 terminator = '>', type = CPP_HEADER_NAME;
646 for (;;)
648 cppchar_t c = *cur++;
650 /* In #include-style directives, terminators are not escapable. */
651 if (c == '\\' && !pfile->state.angled_headers && *cur != '\n')
652 cur++;
653 else if (c == terminator)
654 break;
655 else if (c == '\n')
657 cur--;
658 /* Unmatched quotes always yield undefined behavior, but
659 greedy lexing means that what appears to be an unterminated
660 header name may actually be a legitimate sequence of tokens. */
661 if (terminator == '>')
663 token->type = CPP_LESS;
664 return;
666 type = CPP_OTHER;
667 break;
669 else if (c == '\0')
670 saw_NUL = true;
673 if (saw_NUL && !pfile->state.skipping)
674 cpp_error (pfile, CPP_DL_WARNING,
675 "null character(s) preserved in literal");
677 if (type == CPP_OTHER && CPP_OPTION (pfile, lang) != CLK_ASM)
678 cpp_error (pfile, CPP_DL_PEDWARN, "missing terminating %c character",
679 (int) terminator);
681 pfile->buffer->cur = cur;
682 create_literal (pfile, token, base, cur - base, type);
685 /* Return the comment table. The client may not make any assumption
686 about the ordering of the table. */
687 cpp_comment_table *
688 cpp_get_comments (cpp_reader *pfile)
690 return &pfile->comments;
693 /* Append a comment to the end of the comment table. */
694 static void
695 store_comment (cpp_reader *pfile, cpp_token *token)
697 int len;
699 if (pfile->comments.allocated == 0)
701 pfile->comments.allocated = 256;
702 pfile->comments.entries = (cpp_comment *) xmalloc
703 (pfile->comments.allocated * sizeof (cpp_comment));
706 if (pfile->comments.count == pfile->comments.allocated)
708 pfile->comments.allocated *= 2;
709 pfile->comments.entries = (cpp_comment *) xrealloc
710 (pfile->comments.entries,
711 pfile->comments.allocated * sizeof (cpp_comment));
714 len = token->val.str.len;
716 /* Copy comment. Note, token may not be NULL terminated. */
717 pfile->comments.entries[pfile->comments.count].comment =
718 (char *) xmalloc (sizeof (char) * (len + 1));
719 memcpy (pfile->comments.entries[pfile->comments.count].comment,
720 token->val.str.text, len);
721 pfile->comments.entries[pfile->comments.count].comment[len] = '\0';
723 /* Set source location. */
724 pfile->comments.entries[pfile->comments.count].sloc = token->src_loc;
726 /* Increment the count of entries in the comment table. */
727 pfile->comments.count++;
730 /* The stored comment includes the comment start and any terminator. */
731 static void
732 save_comment (cpp_reader *pfile, cpp_token *token, const unsigned char *from,
733 cppchar_t type)
735 unsigned char *buffer;
736 unsigned int len, clen;
738 len = pfile->buffer->cur - from + 1; /* + 1 for the initial '/'. */
740 /* C++ comments probably (not definitely) have moved past a new
741 line, which we don't want to save in the comment. */
742 if (is_vspace (pfile->buffer->cur[-1]))
743 len--;
745 /* If we are currently in a directive, then we need to store all
746 C++ comments as C comments internally, and so we need to
747 allocate a little extra space in that case.
749 Note that the only time we encounter a directive here is
750 when we are saving comments in a "#define". */
751 clen = (pfile->state.in_directive && type == '/') ? len + 2 : len;
753 buffer = _cpp_unaligned_alloc (pfile, clen);
755 token->type = CPP_COMMENT;
756 token->val.str.len = clen;
757 token->val.str.text = buffer;
759 buffer[0] = '/';
760 memcpy (buffer + 1, from, len - 1);
762 /* Finish conversion to a C comment, if necessary. */
763 if (pfile->state.in_directive && type == '/')
765 buffer[1] = '*';
766 buffer[clen - 2] = '*';
767 buffer[clen - 1] = '/';
770 /* Finally store this comment for use by clients of libcpp. */
771 store_comment (pfile, token);
774 /* Allocate COUNT tokens for RUN. */
775 void
776 _cpp_init_tokenrun (tokenrun *run, unsigned int count)
778 run->base = XNEWVEC (cpp_token, count);
779 run->limit = run->base + count;
780 run->next = NULL;
783 /* Returns the next tokenrun, or creates one if there is none. */
784 static tokenrun *
785 next_tokenrun (tokenrun *run)
787 if (run->next == NULL)
789 run->next = XNEW (tokenrun);
790 run->next->prev = run;
791 _cpp_init_tokenrun (run->next, 250);
794 return run->next;
797 /* Look ahead in the input stream. */
798 const cpp_token *
799 cpp_peek_token (cpp_reader *pfile, int index)
801 cpp_context *context = pfile->context;
802 const cpp_token *peektok;
803 int count;
805 /* First, scan through any pending cpp_context objects. */
806 while (context->prev)
808 ptrdiff_t sz = (context->direct_p
809 ? LAST (context).token - FIRST (context).token
810 : LAST (context).ptoken - FIRST (context).ptoken);
812 if (index < (int) sz)
813 return (context->direct_p
814 ? FIRST (context).token + index
815 : *(FIRST (context).ptoken + index));
817 index -= (int) sz;
818 context = context->prev;
821 /* We will have to read some new tokens after all (and do so
822 without invalidating preceding tokens). */
823 count = index;
824 pfile->keep_tokens++;
828 peektok = _cpp_lex_token (pfile);
829 if (peektok->type == CPP_EOF)
830 return peektok;
832 while (index--);
834 _cpp_backup_tokens_direct (pfile, count + 1);
835 pfile->keep_tokens--;
837 return peektok;
840 /* Allocate a single token that is invalidated at the same time as the
841 rest of the tokens on the line. Has its line and col set to the
842 same as the last lexed token, so that diagnostics appear in the
843 right place. */
844 cpp_token *
845 _cpp_temp_token (cpp_reader *pfile)
847 cpp_token *old, *result;
848 ptrdiff_t sz = pfile->cur_run->limit - pfile->cur_token;
849 ptrdiff_t la = (ptrdiff_t) pfile->lookaheads;
851 old = pfile->cur_token - 1;
852 /* Any pre-existing lookaheads must not be clobbered. */
853 if (la)
855 if (sz <= la)
857 tokenrun *next = next_tokenrun (pfile->cur_run);
859 if (sz < la)
860 memmove (next->base + 1, next->base,
861 (la - sz) * sizeof (cpp_token));
863 next->base[0] = pfile->cur_run->limit[-1];
866 if (sz > 1)
867 memmove (pfile->cur_token + 1, pfile->cur_token,
868 MIN (la, sz - 1) * sizeof (cpp_token));
871 if (!sz && pfile->cur_token == pfile->cur_run->limit)
873 pfile->cur_run = next_tokenrun (pfile->cur_run);
874 pfile->cur_token = pfile->cur_run->base;
877 result = pfile->cur_token++;
878 result->src_loc = old->src_loc;
879 return result;
882 /* Lex a token into RESULT (external interface). Takes care of issues
883 like directive handling, token lookahead, multiple include
884 optimization and skipping. */
885 const cpp_token *
886 _cpp_lex_token (cpp_reader *pfile)
888 cpp_token *result;
890 for (;;)
892 if (pfile->cur_token == pfile->cur_run->limit)
894 pfile->cur_run = next_tokenrun (pfile->cur_run);
895 pfile->cur_token = pfile->cur_run->base;
897 /* We assume that the current token is somewhere in the current
898 run. */
899 if (pfile->cur_token < pfile->cur_run->base
900 || pfile->cur_token >= pfile->cur_run->limit)
901 abort ();
903 if (pfile->lookaheads)
905 pfile->lookaheads--;
906 result = pfile->cur_token++;
908 else
909 result = _cpp_lex_direct (pfile);
911 if (result->flags & BOL)
913 /* Is this a directive. If _cpp_handle_directive returns
914 false, it is an assembler #. */
915 if (result->type == CPP_HASH
916 /* 6.10.3 p 11: Directives in a list of macro arguments
917 gives undefined behavior. This implementation
918 handles the directive as normal. */
919 && pfile->state.parsing_args != 1)
921 if (_cpp_handle_directive (pfile, result->flags & PREV_WHITE))
923 if (pfile->directive_result.type == CPP_PADDING)
924 continue;
925 result = &pfile->directive_result;
928 else if (pfile->state.in_deferred_pragma)
929 result = &pfile->directive_result;
931 if (pfile->cb.line_change && !pfile->state.skipping)
932 pfile->cb.line_change (pfile, result, pfile->state.parsing_args);
935 /* We don't skip tokens in directives. */
936 if (pfile->state.in_directive || pfile->state.in_deferred_pragma)
937 break;
939 /* Outside a directive, invalidate controlling macros. At file
940 EOF, _cpp_lex_direct takes care of popping the buffer, so we never
941 get here and MI optimization works. */
942 pfile->mi_valid = false;
944 if (!pfile->state.skipping || result->type == CPP_EOF)
945 break;
948 return result;
951 /* Returns true if a fresh line has been loaded. */
952 bool
953 _cpp_get_fresh_line (cpp_reader *pfile)
955 int return_at_eof;
957 /* We can't get a new line until we leave the current directive. */
958 if (pfile->state.in_directive)
959 return false;
961 for (;;)
963 cpp_buffer *buffer = pfile->buffer;
965 if (!buffer->need_line)
966 return true;
968 if (buffer->next_line < buffer->rlimit)
970 _cpp_clean_line (pfile);
971 return true;
974 /* First, get out of parsing arguments state. */
975 if (pfile->state.parsing_args)
976 return false;
978 /* End of buffer. Non-empty files should end in a newline. */
979 if (buffer->buf != buffer->rlimit
980 && buffer->next_line > buffer->rlimit
981 && !buffer->from_stage3)
983 /* Clip to buffer size. */
984 buffer->next_line = buffer->rlimit;
987 return_at_eof = buffer->return_at_eof;
988 _cpp_pop_buffer (pfile);
989 if (pfile->buffer == NULL || return_at_eof)
990 return false;
994 #define IF_NEXT_IS(CHAR, THEN_TYPE, ELSE_TYPE) \
995 do \
997 result->type = ELSE_TYPE; \
998 if (*buffer->cur == CHAR) \
999 buffer->cur++, result->type = THEN_TYPE; \
1001 while (0)
1003 /* Lex a token into pfile->cur_token, which is also incremented, to
1004 get diagnostics pointing to the correct location.
1006 Does not handle issues such as token lookahead, multiple-include
1007 optimization, directives, skipping etc. This function is only
1008 suitable for use by _cpp_lex_token, and in special cases like
1009 lex_expansion_token which doesn't care for any of these issues.
1011 When meeting a newline, returns CPP_EOF if parsing a directive,
1012 otherwise returns to the start of the token buffer if permissible.
1013 Returns the location of the lexed token. */
1014 cpp_token *
1015 _cpp_lex_direct (cpp_reader *pfile)
1017 cppchar_t c;
1018 cpp_buffer *buffer;
1019 const unsigned char *comment_start;
1020 cpp_token *result = pfile->cur_token++;
1022 fresh_line:
1023 result->flags = 0;
1024 buffer = pfile->buffer;
1025 if (buffer->need_line)
1027 if (pfile->state.in_deferred_pragma)
1029 result->type = CPP_PRAGMA_EOL;
1030 pfile->state.in_deferred_pragma = false;
1031 if (!pfile->state.pragma_allow_expansion)
1032 pfile->state.prevent_expansion--;
1033 return result;
1035 if (!_cpp_get_fresh_line (pfile))
1037 result->type = CPP_EOF;
1038 if (!pfile->state.in_directive)
1040 /* Tell the compiler the line number of the EOF token. */
1041 result->src_loc = pfile->line_table->highest_line;
1042 result->flags = BOL;
1044 return result;
1046 if (!pfile->keep_tokens)
1048 pfile->cur_run = &pfile->base_run;
1049 result = pfile->base_run.base;
1050 pfile->cur_token = result + 1;
1052 result->flags = BOL;
1053 if (pfile->state.parsing_args == 2)
1054 result->flags |= PREV_WHITE;
1056 buffer = pfile->buffer;
1057 update_tokens_line:
1058 result->src_loc = pfile->line_table->highest_line;
1060 skipped_white:
1061 if (buffer->cur >= buffer->notes[buffer->cur_note].pos
1062 && !pfile->overlaid_buffer)
1064 _cpp_process_line_notes (pfile, false);
1065 result->src_loc = pfile->line_table->highest_line;
1067 c = *buffer->cur++;
1069 LINEMAP_POSITION_FOR_COLUMN (result->src_loc, pfile->line_table,
1070 CPP_BUF_COLUMN (buffer, buffer->cur));
1072 switch (c)
1074 case ' ': case '\t': case '\f': case '\v': case '\0':
1075 result->flags |= PREV_WHITE;
1076 skip_whitespace (pfile, c);
1077 goto skipped_white;
1079 case '\n':
1080 if (buffer->cur < buffer->rlimit)
1081 CPP_INCREMENT_LINE (pfile, 0);
1082 buffer->need_line = true;
1083 goto fresh_line;
1085 case '0': case '1': case '2': case '3': case '4':
1086 case '5': case '6': case '7': case '8': case '9':
1088 struct normalize_state nst = INITIAL_NORMALIZE_STATE;
1089 result->type = CPP_NUMBER;
1090 lex_number (pfile, &result->val.str, &nst);
1091 warn_about_normalization (pfile, result, &nst);
1092 break;
1095 case 'L':
1096 case 'u':
1097 case 'U':
1098 /* 'L', 'u' or 'U' may introduce wide characters or strings. */
1099 if (c == 'L' || CPP_OPTION (pfile, uliterals))
1101 if (*buffer->cur == '\'' || *buffer->cur == '"')
1103 lex_string (pfile, result, buffer->cur - 1);
1104 break;
1107 /* Fall through. */
1109 case '_':
1110 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
1111 case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
1112 case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
1113 case 's': case 't': case 'v': case 'w': case 'x':
1114 case 'y': case 'z':
1115 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
1116 case 'G': case 'H': case 'I': case 'J': case 'K':
1117 case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
1118 case 'S': case 'T': case 'V': case 'W': case 'X':
1119 case 'Y': case 'Z':
1120 result->type = CPP_NAME;
1122 struct normalize_state nst = INITIAL_NORMALIZE_STATE;
1123 result->val.node = lex_identifier (pfile, buffer->cur - 1, false,
1124 &nst);
1125 warn_about_normalization (pfile, result, &nst);
1128 /* Convert named operators to their proper types. */
1129 if (result->val.node->flags & NODE_OPERATOR)
1131 result->flags |= NAMED_OP;
1132 result->type = (enum cpp_ttype) result->val.node->directive_index;
1134 break;
1136 case '\'':
1137 case '"':
1138 lex_string (pfile, result, buffer->cur - 1);
1139 break;
1141 case '/':
1142 /* A potential block or line comment. */
1143 comment_start = buffer->cur;
1144 c = *buffer->cur;
1146 if (c == '*')
1148 if (_cpp_skip_block_comment (pfile))
1149 cpp_error (pfile, CPP_DL_ERROR, "unterminated comment");
1151 else if (c == '/' && (CPP_OPTION (pfile, cplusplus_comments)
1152 || cpp_in_system_header (pfile)))
1154 /* Warn about comments only if pedantically GNUC89, and not
1155 in system headers. */
1156 if (CPP_OPTION (pfile, lang) == CLK_GNUC89 && CPP_PEDANTIC (pfile)
1157 && ! buffer->warned_cplusplus_comments)
1159 cpp_error (pfile, CPP_DL_PEDWARN,
1160 "C++ style comments are not allowed in ISO C90");
1161 cpp_error (pfile, CPP_DL_PEDWARN,
1162 "(this will be reported only once per input file)");
1163 buffer->warned_cplusplus_comments = 1;
1166 if (skip_line_comment (pfile) && CPP_OPTION (pfile, warn_comments))
1167 cpp_error (pfile, CPP_DL_WARNING, "multi-line comment");
1169 else if (c == '=')
1171 buffer->cur++;
1172 result->type = CPP_DIV_EQ;
1173 break;
1175 else
1177 result->type = CPP_DIV;
1178 break;
1181 if (!pfile->state.save_comments)
1183 result->flags |= PREV_WHITE;
1184 goto update_tokens_line;
1187 /* Save the comment as a token in its own right. */
1188 save_comment (pfile, result, comment_start, c);
1189 break;
1191 case '<':
1192 if (pfile->state.angled_headers)
1194 lex_string (pfile, result, buffer->cur - 1);
1195 if (result->type != CPP_LESS)
1196 break;
1199 result->type = CPP_LESS;
1200 if (*buffer->cur == '=')
1201 buffer->cur++, result->type = CPP_LESS_EQ;
1202 else if (*buffer->cur == '<')
1204 buffer->cur++;
1205 IF_NEXT_IS ('=', CPP_LSHIFT_EQ, CPP_LSHIFT);
1207 else if (CPP_OPTION (pfile, digraphs))
1209 if (*buffer->cur == ':')
1211 buffer->cur++;
1212 result->flags |= DIGRAPH;
1213 result->type = CPP_OPEN_SQUARE;
1215 else if (*buffer->cur == '%')
1217 buffer->cur++;
1218 result->flags |= DIGRAPH;
1219 result->type = CPP_OPEN_BRACE;
1222 break;
1224 case '>':
1225 result->type = CPP_GREATER;
1226 if (*buffer->cur == '=')
1227 buffer->cur++, result->type = CPP_GREATER_EQ;
1228 else if (*buffer->cur == '>')
1230 buffer->cur++;
1231 IF_NEXT_IS ('=', CPP_RSHIFT_EQ, CPP_RSHIFT);
1233 break;
1235 case '%':
1236 result->type = CPP_MOD;
1237 if (*buffer->cur == '=')
1238 buffer->cur++, result->type = CPP_MOD_EQ;
1239 else if (CPP_OPTION (pfile, digraphs))
1241 if (*buffer->cur == ':')
1243 buffer->cur++;
1244 result->flags |= DIGRAPH;
1245 result->type = CPP_HASH;
1246 if (*buffer->cur == '%' && buffer->cur[1] == ':')
1247 buffer->cur += 2, result->type = CPP_PASTE, result->val.arg_no = 0;
1249 else if (*buffer->cur == '>')
1251 buffer->cur++;
1252 result->flags |= DIGRAPH;
1253 result->type = CPP_CLOSE_BRACE;
1256 break;
1258 case '.':
1259 result->type = CPP_DOT;
1260 if (ISDIGIT (*buffer->cur))
1262 struct normalize_state nst = INITIAL_NORMALIZE_STATE;
1263 result->type = CPP_NUMBER;
1264 lex_number (pfile, &result->val.str, &nst);
1265 warn_about_normalization (pfile, result, &nst);
1267 else if (*buffer->cur == '.' && buffer->cur[1] == '.')
1268 buffer->cur += 2, result->type = CPP_ELLIPSIS;
1269 else if (*buffer->cur == '*' && CPP_OPTION (pfile, cplusplus))
1270 buffer->cur++, result->type = CPP_DOT_STAR;
1271 break;
1273 case '+':
1274 result->type = CPP_PLUS;
1275 if (*buffer->cur == '+')
1276 buffer->cur++, result->type = CPP_PLUS_PLUS;
1277 else if (*buffer->cur == '=')
1278 buffer->cur++, result->type = CPP_PLUS_EQ;
1279 break;
1281 case '-':
1282 result->type = CPP_MINUS;
1283 if (*buffer->cur == '>')
1285 buffer->cur++;
1286 result->type = CPP_DEREF;
1287 if (*buffer->cur == '*' && CPP_OPTION (pfile, cplusplus))
1288 buffer->cur++, result->type = CPP_DEREF_STAR;
1290 else if (*buffer->cur == '-')
1291 buffer->cur++, result->type = CPP_MINUS_MINUS;
1292 else if (*buffer->cur == '=')
1293 buffer->cur++, result->type = CPP_MINUS_EQ;
1294 break;
1296 case '&':
1297 result->type = CPP_AND;
1298 if (*buffer->cur == '&')
1299 buffer->cur++, result->type = CPP_AND_AND;
1300 else if (*buffer->cur == '=')
1301 buffer->cur++, result->type = CPP_AND_EQ;
1302 break;
1304 case '|':
1305 result->type = CPP_OR;
1306 if (*buffer->cur == '|')
1307 buffer->cur++, result->type = CPP_OR_OR;
1308 else if (*buffer->cur == '=')
1309 buffer->cur++, result->type = CPP_OR_EQ;
1310 break;
1312 case ':':
1313 result->type = CPP_COLON;
1314 if (*buffer->cur == ':' && CPP_OPTION (pfile, cplusplus))
1315 buffer->cur++, result->type = CPP_SCOPE;
1316 else if (*buffer->cur == '>' && CPP_OPTION (pfile, digraphs))
1318 buffer->cur++;
1319 result->flags |= DIGRAPH;
1320 result->type = CPP_CLOSE_SQUARE;
1322 break;
1324 case '*': IF_NEXT_IS ('=', CPP_MULT_EQ, CPP_MULT); break;
1325 case '=': IF_NEXT_IS ('=', CPP_EQ_EQ, CPP_EQ); break;
1326 case '!': IF_NEXT_IS ('=', CPP_NOT_EQ, CPP_NOT); break;
1327 case '^': IF_NEXT_IS ('=', CPP_XOR_EQ, CPP_XOR); break;
1328 case '#': IF_NEXT_IS ('#', CPP_PASTE, CPP_HASH); result->val.arg_no = 0; break;
1330 case '?': result->type = CPP_QUERY; break;
1331 case '~': result->type = CPP_COMPL; break;
1332 case ',': result->type = CPP_COMMA; break;
1333 case '(': result->type = CPP_OPEN_PAREN; break;
1334 case ')': result->type = CPP_CLOSE_PAREN; break;
1335 case '[': result->type = CPP_OPEN_SQUARE; break;
1336 case ']': result->type = CPP_CLOSE_SQUARE; break;
1337 case '{': result->type = CPP_OPEN_BRACE; break;
1338 case '}': result->type = CPP_CLOSE_BRACE; break;
1339 case ';': result->type = CPP_SEMICOLON; break;
1341 /* @ is a punctuator in Objective-C. */
1342 case '@': result->type = CPP_ATSIGN; break;
1344 case '$':
1345 case '\\':
1347 const uchar *base = --buffer->cur;
1348 struct normalize_state nst = INITIAL_NORMALIZE_STATE;
1350 if (forms_identifier_p (pfile, true, &nst))
1352 result->type = CPP_NAME;
1353 result->val.node = lex_identifier (pfile, base, true, &nst);
1354 warn_about_normalization (pfile, result, &nst);
1355 break;
1357 buffer->cur++;
1360 default:
1361 create_literal (pfile, result, buffer->cur - 1, 1, CPP_OTHER);
1362 break;
1365 return result;
1368 /* An upper bound on the number of bytes needed to spell TOKEN.
1369 Does not include preceding whitespace. */
1370 unsigned int
1371 cpp_token_len (const cpp_token *token)
1373 unsigned int len;
1375 switch (TOKEN_SPELL (token))
1377 default: len = 6; break;
1378 case SPELL_LITERAL: len = token->val.str.len; break;
1379 case SPELL_IDENT: len = NODE_LEN (token->val.node) * 10; break;
1382 return len;
1385 /* Parse UTF-8 out of NAMEP and place a \U escape in BUFFER.
1386 Return the number of bytes read out of NAME. (There are always
1387 10 bytes written to BUFFER.) */
1389 static size_t
1390 utf8_to_ucn (unsigned char *buffer, const unsigned char *name)
1392 int j;
1393 int ucn_len = 0;
1394 int ucn_len_c;
1395 unsigned t;
1396 unsigned long utf32;
1398 /* Compute the length of the UTF-8 sequence. */
1399 for (t = *name; t & 0x80; t <<= 1)
1400 ucn_len++;
1402 utf32 = *name & (0x7F >> ucn_len);
1403 for (ucn_len_c = 1; ucn_len_c < ucn_len; ucn_len_c++)
1405 utf32 = (utf32 << 6) | (*++name & 0x3F);
1407 /* Ill-formed UTF-8. */
1408 if ((*name & ~0x3F) != 0x80)
1409 abort ();
1412 *buffer++ = '\\';
1413 *buffer++ = 'U';
1414 for (j = 7; j >= 0; j--)
1415 *buffer++ = "0123456789abcdef"[(utf32 >> (4 * j)) & 0xF];
1416 return ucn_len;
1420 /* Write the spelling of a token TOKEN to BUFFER. The buffer must
1421 already contain the enough space to hold the token's spelling.
1422 Returns a pointer to the character after the last character written.
1423 FORSTRING is true if this is to be the spelling after translation
1424 phase 1 (this is different for UCNs).
1425 FIXME: Would be nice if we didn't need the PFILE argument. */
1426 unsigned char *
1427 cpp_spell_token (cpp_reader *pfile, const cpp_token *token,
1428 unsigned char *buffer, bool forstring)
1430 switch (TOKEN_SPELL (token))
1432 case SPELL_OPERATOR:
1434 const unsigned char *spelling;
1435 unsigned char c;
1437 if (token->flags & DIGRAPH)
1438 spelling
1439 = digraph_spellings[(int) token->type - (int) CPP_FIRST_DIGRAPH];
1440 else if (token->flags & NAMED_OP)
1441 goto spell_ident;
1442 else
1443 spelling = TOKEN_NAME (token);
1445 while ((c = *spelling++) != '\0')
1446 *buffer++ = c;
1448 break;
1450 spell_ident:
1451 case SPELL_IDENT:
1452 if (forstring)
1454 memcpy (buffer, NODE_NAME (token->val.node),
1455 NODE_LEN (token->val.node));
1456 buffer += NODE_LEN (token->val.node);
1458 else
1460 size_t i;
1461 const unsigned char * name = NODE_NAME (token->val.node);
1463 for (i = 0; i < NODE_LEN (token->val.node); i++)
1464 if (name[i] & ~0x7F)
1466 i += utf8_to_ucn (buffer, name + i) - 1;
1467 buffer += 10;
1469 else
1470 *buffer++ = NODE_NAME (token->val.node)[i];
1472 break;
1474 case SPELL_LITERAL:
1475 memcpy (buffer, token->val.str.text, token->val.str.len);
1476 buffer += token->val.str.len;
1477 break;
1479 case SPELL_NONE:
1480 cpp_error (pfile, CPP_DL_ICE,
1481 "unspellable token %s", TOKEN_NAME (token));
1482 break;
1485 return buffer;
1488 /* Returns TOKEN spelt as a null-terminated string. The string is
1489 freed when the reader is destroyed. Useful for diagnostics. */
1490 unsigned char *
1491 cpp_token_as_text (cpp_reader *pfile, const cpp_token *token)
1493 unsigned int len = cpp_token_len (token) + 1;
1494 unsigned char *start = _cpp_unaligned_alloc (pfile, len), *end;
1496 end = cpp_spell_token (pfile, token, start, false);
1497 end[0] = '\0';
1499 return start;
1502 /* Used by C front ends, which really should move to using
1503 cpp_token_as_text. */
1504 const char *
1505 cpp_type2name (enum cpp_ttype type)
1507 return (const char *) token_spellings[type].name;
1510 /* Writes the spelling of token to FP, without any preceding space.
1511 Separated from cpp_spell_token for efficiency - to avoid stdio
1512 double-buffering. */
1513 void
1514 cpp_output_token (const cpp_token *token, FILE *fp)
1516 switch (TOKEN_SPELL (token))
1518 case SPELL_OPERATOR:
1520 const unsigned char *spelling;
1521 int c;
1523 if (token->flags & DIGRAPH)
1524 spelling
1525 = digraph_spellings[(int) token->type - (int) CPP_FIRST_DIGRAPH];
1526 else if (token->flags & NAMED_OP)
1527 goto spell_ident;
1528 else
1529 spelling = TOKEN_NAME (token);
1531 c = *spelling;
1533 putc (c, fp);
1534 while ((c = *++spelling) != '\0');
1536 break;
1538 spell_ident:
1539 case SPELL_IDENT:
1541 size_t i;
1542 const unsigned char * name = NODE_NAME (token->val.node);
1544 for (i = 0; i < NODE_LEN (token->val.node); i++)
1545 if (name[i] & ~0x7F)
1547 unsigned char buffer[10];
1548 i += utf8_to_ucn (buffer, name + i) - 1;
1549 fwrite (buffer, 1, 10, fp);
1551 else
1552 fputc (NODE_NAME (token->val.node)[i], fp);
1554 break;
1556 case SPELL_LITERAL:
1557 fwrite (token->val.str.text, 1, token->val.str.len, fp);
1558 break;
1560 case SPELL_NONE:
1561 /* An error, most probably. */
1562 break;
1566 /* Compare two tokens. */
1568 _cpp_equiv_tokens (const cpp_token *a, const cpp_token *b)
1570 if (a->type == b->type && a->flags == b->flags)
1571 switch (TOKEN_SPELL (a))
1573 default: /* Keep compiler happy. */
1574 case SPELL_OPERATOR:
1575 /* arg_no is used to track where multiple consecutive ##
1576 tokens were originally located. */
1577 return (a->type != CPP_PASTE || a->val.arg_no == b->val.arg_no);
1578 case SPELL_NONE:
1579 return (a->type != CPP_MACRO_ARG || a->val.arg_no == b->val.arg_no);
1580 case SPELL_IDENT:
1581 return a->val.node == b->val.node;
1582 case SPELL_LITERAL:
1583 return (a->val.str.len == b->val.str.len
1584 && !memcmp (a->val.str.text, b->val.str.text,
1585 a->val.str.len));
1588 return 0;
1591 /* Returns nonzero if a space should be inserted to avoid an
1592 accidental token paste for output. For simplicity, it is
1593 conservative, and occasionally advises a space where one is not
1594 needed, e.g. "." and ".2". */
1596 cpp_avoid_paste (cpp_reader *pfile, const cpp_token *token1,
1597 const cpp_token *token2)
1599 enum cpp_ttype a = token1->type, b = token2->type;
1600 cppchar_t c;
1602 if (token1->flags & NAMED_OP)
1603 a = CPP_NAME;
1604 if (token2->flags & NAMED_OP)
1605 b = CPP_NAME;
1607 c = EOF;
1608 if (token2->flags & DIGRAPH)
1609 c = digraph_spellings[(int) b - (int) CPP_FIRST_DIGRAPH][0];
1610 else if (token_spellings[b].category == SPELL_OPERATOR)
1611 c = token_spellings[b].name[0];
1613 /* Quickly get everything that can paste with an '='. */
1614 if ((int) a <= (int) CPP_LAST_EQ && c == '=')
1615 return 1;
1617 switch (a)
1619 case CPP_GREATER: return c == '>';
1620 case CPP_LESS: return c == '<' || c == '%' || c == ':';
1621 case CPP_PLUS: return c == '+';
1622 case CPP_MINUS: return c == '-' || c == '>';
1623 case CPP_DIV: return c == '/' || c == '*'; /* Comments. */
1624 case CPP_MOD: return c == ':' || c == '>';
1625 case CPP_AND: return c == '&';
1626 case CPP_OR: return c == '|';
1627 case CPP_COLON: return c == ':' || c == '>';
1628 case CPP_DEREF: return c == '*';
1629 case CPP_DOT: return c == '.' || c == '%' || b == CPP_NUMBER;
1630 case CPP_HASH: return c == '#' || c == '%'; /* Digraph form. */
1631 case CPP_NAME: return ((b == CPP_NUMBER
1632 && name_p (pfile, &token2->val.str))
1633 || b == CPP_NAME
1634 || b == CPP_CHAR || b == CPP_STRING); /* L */
1635 case CPP_NUMBER: return (b == CPP_NUMBER || b == CPP_NAME
1636 || c == '.' || c == '+' || c == '-');
1637 /* UCNs */
1638 case CPP_OTHER: return ((token1->val.str.text[0] == '\\'
1639 && b == CPP_NAME)
1640 || (CPP_OPTION (pfile, objc)
1641 && token1->val.str.text[0] == '@'
1642 && (b == CPP_NAME || b == CPP_STRING)));
1643 default: break;
1646 return 0;
1649 /* Output all the remaining tokens on the current line, and a newline
1650 character, to FP. Leading whitespace is removed. If there are
1651 macros, special token padding is not performed. */
1652 void
1653 cpp_output_line (cpp_reader *pfile, FILE *fp)
1655 const cpp_token *token;
1657 token = cpp_get_token (pfile);
1658 while (token->type != CPP_EOF)
1660 cpp_output_token (token, fp);
1661 token = cpp_get_token (pfile);
1662 if (token->flags & PREV_WHITE)
1663 putc (' ', fp);
1666 putc ('\n', fp);
1669 /* Return a string representation of all the remaining tokens on the
1670 current line. The result is allocated using xmalloc and must be
1671 freed by the caller. */
1672 unsigned char *
1673 cpp_output_line_to_string (cpp_reader *pfile, const unsigned char *dir_name)
1675 const cpp_token *token;
1676 unsigned int out = dir_name ? ustrlen (dir_name) : 0;
1677 unsigned int alloced = 120 + out;
1678 unsigned char *result = (unsigned char *) xmalloc (alloced);
1680 /* If DIR_NAME is empty, there are no initial contents. */
1681 if (dir_name)
1683 sprintf ((char *) result, "#%s ", dir_name);
1684 out += 2;
1687 token = cpp_get_token (pfile);
1688 while (token->type != CPP_EOF)
1690 unsigned char *last;
1691 /* Include room for a possible space and the terminating nul. */
1692 unsigned int len = cpp_token_len (token) + 2;
1694 if (out + len > alloced)
1696 alloced *= 2;
1697 if (out + len > alloced)
1698 alloced = out + len;
1699 result = (unsigned char *) xrealloc (result, alloced);
1702 last = cpp_spell_token (pfile, token, &result[out], 0);
1703 out = last - result;
1705 token = cpp_get_token (pfile);
1706 if (token->flags & PREV_WHITE)
1707 result[out++] = ' ';
1710 result[out] = '\0';
1711 return result;
1714 /* Memory buffers. Changing these three constants can have a dramatic
1715 effect on performance. The values here are reasonable defaults,
1716 but might be tuned. If you adjust them, be sure to test across a
1717 range of uses of cpplib, including heavy nested function-like macro
1718 expansion. Also check the change in peak memory usage (NJAMD is a
1719 good tool for this). */
1720 #define MIN_BUFF_SIZE 8000
1721 #define BUFF_SIZE_UPPER_BOUND(MIN_SIZE) (MIN_BUFF_SIZE + (MIN_SIZE) * 3 / 2)
1722 #define EXTENDED_BUFF_SIZE(BUFF, MIN_EXTRA) \
1723 (MIN_EXTRA + ((BUFF)->limit - (BUFF)->cur) * 2)
1725 #if MIN_BUFF_SIZE > BUFF_SIZE_UPPER_BOUND (0)
1726 #error BUFF_SIZE_UPPER_BOUND must be at least as large as MIN_BUFF_SIZE!
1727 #endif
1729 /* Create a new allocation buffer. Place the control block at the end
1730 of the buffer, so that buffer overflows will cause immediate chaos. */
1731 static _cpp_buff *
1732 new_buff (size_t len)
1734 _cpp_buff *result;
1735 unsigned char *base;
1737 if (len < MIN_BUFF_SIZE)
1738 len = MIN_BUFF_SIZE;
1739 len = CPP_ALIGN (len);
1741 base = XNEWVEC (unsigned char, len + sizeof (_cpp_buff));
1742 result = (_cpp_buff *) (base + len);
1743 result->base = base;
1744 result->cur = base;
1745 result->limit = base + len;
1746 result->next = NULL;
1747 return result;
1750 /* Place a chain of unwanted allocation buffers on the free list. */
1751 void
1752 _cpp_release_buff (cpp_reader *pfile, _cpp_buff *buff)
1754 _cpp_buff *end = buff;
1756 while (end->next)
1757 end = end->next;
1758 end->next = pfile->free_buffs;
1759 pfile->free_buffs = buff;
1762 /* Return a free buffer of size at least MIN_SIZE. */
1763 _cpp_buff *
1764 _cpp_get_buff (cpp_reader *pfile, size_t min_size)
1766 _cpp_buff *result, **p;
1768 for (p = &pfile->free_buffs;; p = &(*p)->next)
1770 size_t size;
1772 if (*p == NULL)
1773 return new_buff (min_size);
1774 result = *p;
1775 size = result->limit - result->base;
1776 /* Return a buffer that's big enough, but don't waste one that's
1777 way too big. */
1778 if (size >= min_size && size <= BUFF_SIZE_UPPER_BOUND (min_size))
1779 break;
1782 *p = result->next;
1783 result->next = NULL;
1784 result->cur = result->base;
1785 return result;
1788 /* Creates a new buffer with enough space to hold the uncommitted
1789 remaining bytes of BUFF, and at least MIN_EXTRA more bytes. Copies
1790 the excess bytes to the new buffer. Chains the new buffer after
1791 BUFF, and returns the new buffer. */
1792 _cpp_buff *
1793 _cpp_append_extend_buff (cpp_reader *pfile, _cpp_buff *buff, size_t min_extra)
1795 size_t size = EXTENDED_BUFF_SIZE (buff, min_extra);
1796 _cpp_buff *new_buff = _cpp_get_buff (pfile, size);
1798 buff->next = new_buff;
1799 memcpy (new_buff->base, buff->cur, BUFF_ROOM (buff));
1800 return new_buff;
1803 /* Creates a new buffer with enough space to hold the uncommitted
1804 remaining bytes of the buffer pointed to by BUFF, and at least
1805 MIN_EXTRA more bytes. Copies the excess bytes to the new buffer.
1806 Chains the new buffer before the buffer pointed to by BUFF, and
1807 updates the pointer to point to the new buffer. */
1808 void
1809 _cpp_extend_buff (cpp_reader *pfile, _cpp_buff **pbuff, size_t min_extra)
1811 _cpp_buff *new_buff, *old_buff = *pbuff;
1812 size_t size = EXTENDED_BUFF_SIZE (old_buff, min_extra);
1814 new_buff = _cpp_get_buff (pfile, size);
1815 memcpy (new_buff->base, old_buff->cur, BUFF_ROOM (old_buff));
1816 new_buff->next = old_buff;
1817 *pbuff = new_buff;
1820 /* Free a chain of buffers starting at BUFF. */
1821 void
1822 _cpp_free_buff (_cpp_buff *buff)
1824 _cpp_buff *next;
1826 for (; buff; buff = next)
1828 next = buff->next;
1829 free (buff->base);
1833 /* Allocate permanent, unaligned storage of length LEN. */
1834 unsigned char *
1835 _cpp_unaligned_alloc (cpp_reader *pfile, size_t len)
1837 _cpp_buff *buff = pfile->u_buff;
1838 unsigned char *result = buff->cur;
1840 if (len > (size_t) (buff->limit - result))
1842 buff = _cpp_get_buff (pfile, len);
1843 buff->next = pfile->u_buff;
1844 pfile->u_buff = buff;
1845 result = buff->cur;
1848 buff->cur = result + len;
1849 return result;
1852 /* Allocate permanent, unaligned storage of length LEN from a_buff.
1853 That buffer is used for growing allocations when saving macro
1854 replacement lists in a #define, and when parsing an answer to an
1855 assertion in #assert, #unassert or #if (and therefore possibly
1856 whilst expanding macros). It therefore must not be used by any
1857 code that they might call: specifically the lexer and the guts of
1858 the macro expander.
1860 All existing other uses clearly fit this restriction: storing
1861 registered pragmas during initialization. */
1862 unsigned char *
1863 _cpp_aligned_alloc (cpp_reader *pfile, size_t len)
1865 _cpp_buff *buff = pfile->a_buff;
1866 unsigned char *result = buff->cur;
1868 if (len > (size_t) (buff->limit - result))
1870 buff = _cpp_get_buff (pfile, len);
1871 buff->next = pfile->a_buff;
1872 pfile->a_buff = buff;
1873 result = buff->cur;
1876 buff->cur = result + len;
1877 return result;
1880 /* Say which field of TOK is in use. */
1882 enum cpp_token_fld_kind
1883 cpp_token_val_index (cpp_token *tok)
1885 switch (TOKEN_SPELL (tok))
1887 case SPELL_IDENT:
1888 return CPP_TOKEN_FLD_NODE;
1889 case SPELL_LITERAL:
1890 return CPP_TOKEN_FLD_STR;
1891 case SPELL_OPERATOR:
1892 if (tok->type == CPP_PASTE)
1893 return CPP_TOKEN_FLD_ARG_NO;
1894 else
1895 return CPP_TOKEN_FLD_NONE;
1896 case SPELL_NONE:
1897 if (tok->type == CPP_MACRO_ARG)
1898 return CPP_TOKEN_FLD_ARG_NO;
1899 else if (tok->type == CPP_PADDING)
1900 return CPP_TOKEN_FLD_SOURCE;
1901 else if (tok->type == CPP_PRAGMA)
1902 return CPP_TOKEN_FLD_PRAGMA;
1903 /* else fall through */
1904 default:
1905 return CPP_TOKEN_FLD_NONE;