Oops, missed deletion.
[official-gcc.git] / libcpp / lex.c
blobbab14a4baa3d761f34f512f0f677071f3abef372
1 /* CPP Library - lexical analysis.
2 Copyright (C) 2000, 2001, 2002, 2003, 2004, 2005, 2007, 2008, 2009
3 Free Software Foundation, Inc.
4 Contributed by Per Bothner, 1994-95.
5 Based on CCCP program by Paul Rubin, June 1986
6 Adapted to ANSI C, Richard Stallman, Jan 1987
7 Broken out to separate file, Zack Weinberg, Mar 2000
9 This program is free software; you can redistribute it and/or modify it
10 under the terms of the GNU General Public License as published by the
11 Free Software Foundation; either version 3, or (at your option) any
12 later version.
14 This program is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
19 You should have received a copy of the GNU General Public License
20 along with this program; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
23 #include "config.h"
24 #include "system.h"
25 #include "cpplib.h"
26 #include "internal.h"
28 enum spell_type
30 SPELL_OPERATOR = 0,
31 SPELL_IDENT,
32 SPELL_LITERAL,
33 SPELL_NONE
36 struct token_spelling
38 enum spell_type category;
39 const unsigned char *name;
42 static const unsigned char *const digraph_spellings[] =
43 { UC"%:", UC"%:%:", UC"<:", UC":>", UC"<%", UC"%>" };
45 #define OP(e, s) { SPELL_OPERATOR, UC s },
46 #define TK(e, s) { SPELL_ ## s, UC #e },
47 static const struct token_spelling token_spellings[N_TTYPES] = { TTYPE_TABLE };
48 #undef OP
49 #undef TK
51 #define TOKEN_SPELL(token) (token_spellings[(token)->type].category)
52 #define TOKEN_NAME(token) (token_spellings[(token)->type].name)
54 static void add_line_note (cpp_buffer *, const uchar *, unsigned int);
55 static int skip_line_comment (cpp_reader *);
56 static void skip_whitespace (cpp_reader *, cppchar_t);
57 static void lex_string (cpp_reader *, cpp_token *, const uchar *);
58 static void save_comment (cpp_reader *, cpp_token *, const uchar *, cppchar_t);
59 static void store_comment (cpp_reader *, cpp_token *);
60 static void create_literal (cpp_reader *, cpp_token *, const uchar *,
61 unsigned int, enum cpp_ttype);
62 static bool warn_in_comment (cpp_reader *, _cpp_line_note *);
63 static int name_p (cpp_reader *, const cpp_string *);
64 static tokenrun *next_tokenrun (tokenrun *);
66 static _cpp_buff *new_buff (size_t);
69 /* Utility routine:
71 Compares, the token TOKEN to the NUL-terminated string STRING.
72 TOKEN must be a CPP_NAME. Returns 1 for equal, 0 for unequal. */
73 int
74 cpp_ideq (const cpp_token *token, const char *string)
76 if (token->type != CPP_NAME)
77 return 0;
79 return !ustrcmp (NODE_NAME (token->val.node.node), (const uchar *) string);
82 /* Record a note TYPE at byte POS into the current cleaned logical
83 line. */
84 static void
85 add_line_note (cpp_buffer *buffer, const uchar *pos, unsigned int type)
87 if (buffer->notes_used == buffer->notes_cap)
89 buffer->notes_cap = buffer->notes_cap * 2 + 200;
90 buffer->notes = XRESIZEVEC (_cpp_line_note, buffer->notes,
91 buffer->notes_cap);
94 buffer->notes[buffer->notes_used].pos = pos;
95 buffer->notes[buffer->notes_used].type = type;
96 buffer->notes_used++;
99 /* Returns with a logical line that contains no escaped newlines or
100 trigraphs. This is a time-critical inner loop. */
101 void
102 _cpp_clean_line (cpp_reader *pfile)
104 cpp_buffer *buffer;
105 const uchar *s;
106 uchar c, *d, *p;
108 buffer = pfile->buffer;
109 buffer->cur_note = buffer->notes_used = 0;
110 buffer->cur = buffer->line_base = buffer->next_line;
111 buffer->need_line = false;
112 s = buffer->next_line - 1;
114 if (!buffer->from_stage3)
116 const uchar *pbackslash = NULL;
118 /* Short circuit for the common case of an un-escaped line with
119 no trigraphs. The primary win here is by not writing any
120 data back to memory until we have to. */
121 for (;;)
123 c = *++s;
124 if (__builtin_expect (c == '\n', false)
125 || __builtin_expect (c == '\r', false))
127 d = (uchar *) s;
129 if (__builtin_expect (s == buffer->rlimit, false))
130 goto done;
132 /* DOS line ending? */
133 if (__builtin_expect (c == '\r', false)
134 && s[1] == '\n')
136 s++;
137 if (s == buffer->rlimit)
138 goto done;
141 if (__builtin_expect (pbackslash == NULL, true))
142 goto done;
144 /* Check for escaped newline. */
145 p = d;
146 while (is_nvspace (p[-1]))
147 p--;
148 if (p - 1 != pbackslash)
149 goto done;
151 /* Have an escaped newline; process it and proceed to
152 the slow path. */
153 add_line_note (buffer, p - 1, p != d ? ' ' : '\\');
154 d = p - 2;
155 buffer->next_line = p - 1;
156 break;
158 if (__builtin_expect (c == '\\', false))
159 pbackslash = s;
160 else if (__builtin_expect (c == '?', false)
161 && __builtin_expect (s[1] == '?', false)
162 && _cpp_trigraph_map[s[2]])
164 /* Have a trigraph. We may or may not have to convert
165 it. Add a line note regardless, for -Wtrigraphs. */
166 add_line_note (buffer, s, s[2]);
167 if (CPP_OPTION (pfile, trigraphs))
169 /* We do, and that means we have to switch to the
170 slow path. */
171 d = (uchar *) s;
172 *d = _cpp_trigraph_map[s[2]];
173 s += 2;
174 break;
180 for (;;)
182 c = *++s;
183 *++d = c;
185 if (c == '\n' || c == '\r')
187 /* Handle DOS line endings. */
188 if (c == '\r' && s != buffer->rlimit && s[1] == '\n')
189 s++;
190 if (s == buffer->rlimit)
191 break;
193 /* Escaped? */
194 p = d;
195 while (p != buffer->next_line && is_nvspace (p[-1]))
196 p--;
197 if (p == buffer->next_line || p[-1] != '\\')
198 break;
200 add_line_note (buffer, p - 1, p != d ? ' ': '\\');
201 d = p - 2;
202 buffer->next_line = p - 1;
204 else if (c == '?' && s[1] == '?' && _cpp_trigraph_map[s[2]])
206 /* Add a note regardless, for the benefit of -Wtrigraphs. */
207 add_line_note (buffer, d, s[2]);
208 if (CPP_OPTION (pfile, trigraphs))
210 *d = _cpp_trigraph_map[s[2]];
211 s += 2;
216 else
219 s++;
220 while (*s != '\n' && *s != '\r');
221 d = (uchar *) s;
223 /* Handle DOS line endings. */
224 if (*s == '\r' && s != buffer->rlimit && s[1] == '\n')
225 s++;
228 done:
229 *d = '\n';
230 /* A sentinel note that should never be processed. */
231 add_line_note (buffer, d + 1, '\n');
232 buffer->next_line = s + 1;
235 /* Return true if the trigraph indicated by NOTE should be warned
236 about in a comment. */
237 static bool
238 warn_in_comment (cpp_reader *pfile, _cpp_line_note *note)
240 const uchar *p;
242 /* Within comments we don't warn about trigraphs, unless the
243 trigraph forms an escaped newline, as that may change
244 behavior. */
245 if (note->type != '/')
246 return false;
248 /* If -trigraphs, then this was an escaped newline iff the next note
249 is coincident. */
250 if (CPP_OPTION (pfile, trigraphs))
251 return note[1].pos == note->pos;
253 /* Otherwise, see if this forms an escaped newline. */
254 p = note->pos + 3;
255 while (is_nvspace (*p))
256 p++;
258 /* There might have been escaped newlines between the trigraph and the
259 newline we found. Hence the position test. */
260 return (*p == '\n' && p < note[1].pos);
263 /* Process the notes created by add_line_note as far as the current
264 location. */
265 void
266 _cpp_process_line_notes (cpp_reader *pfile, int in_comment)
268 cpp_buffer *buffer = pfile->buffer;
270 for (;;)
272 _cpp_line_note *note = &buffer->notes[buffer->cur_note];
273 unsigned int col;
275 if (note->pos > buffer->cur)
276 break;
278 buffer->cur_note++;
279 col = CPP_BUF_COLUMN (buffer, note->pos + 1);
281 if (note->type == '\\' || note->type == ' ')
283 if (note->type == ' ' && !in_comment)
284 cpp_error_with_line (pfile, CPP_DL_WARNING, pfile->line_table->highest_line, col,
285 "backslash and newline separated by space");
287 if (buffer->next_line > buffer->rlimit)
289 cpp_error_with_line (pfile, CPP_DL_PEDWARN, pfile->line_table->highest_line, col,
290 "backslash-newline at end of file");
291 /* Prevent "no newline at end of file" warning. */
292 buffer->next_line = buffer->rlimit;
295 buffer->line_base = note->pos;
296 CPP_INCREMENT_LINE (pfile, 0);
298 else if (_cpp_trigraph_map[note->type])
300 if (CPP_OPTION (pfile, warn_trigraphs)
301 && (!in_comment || warn_in_comment (pfile, note)))
303 if (CPP_OPTION (pfile, trigraphs))
304 cpp_error_with_line (pfile, CPP_DL_WARNING, pfile->line_table->highest_line, col,
305 "trigraph ??%c converted to %c",
306 note->type,
307 (int) _cpp_trigraph_map[note->type]);
308 else
310 cpp_error_with_line
311 (pfile, CPP_DL_WARNING, pfile->line_table->highest_line, col,
312 "trigraph ??%c ignored, use -trigraphs to enable",
313 note->type);
317 else
318 abort ();
322 /* Skip a C-style block comment. We find the end of the comment by
323 seeing if an asterisk is before every '/' we encounter. Returns
324 nonzero if comment terminated by EOF, zero otherwise.
326 Buffer->cur points to the initial asterisk of the comment. */
327 bool
328 _cpp_skip_block_comment (cpp_reader *pfile)
330 cpp_buffer *buffer = pfile->buffer;
331 const uchar *cur = buffer->cur;
332 uchar c;
334 cur++;
335 if (*cur == '/')
336 cur++;
338 for (;;)
340 /* People like decorating comments with '*', so check for '/'
341 instead for efficiency. */
342 c = *cur++;
344 if (c == '/')
346 if (cur[-2] == '*')
347 break;
349 /* Warn about potential nested comments, but not if the '/'
350 comes immediately before the true comment delimiter.
351 Don't bother to get it right across escaped newlines. */
352 if (CPP_OPTION (pfile, warn_comments)
353 && cur[0] == '*' && cur[1] != '/')
355 buffer->cur = cur;
356 cpp_error_with_line (pfile, CPP_DL_WARNING,
357 pfile->line_table->highest_line, CPP_BUF_COL (buffer),
358 "\"/*\" within comment");
361 else if (c == '\n')
363 unsigned int cols;
364 buffer->cur = cur - 1;
365 _cpp_process_line_notes (pfile, true);
366 if (buffer->next_line >= buffer->rlimit)
367 return true;
368 _cpp_clean_line (pfile);
370 cols = buffer->next_line - buffer->line_base;
371 CPP_INCREMENT_LINE (pfile, cols);
373 cur = buffer->cur;
377 buffer->cur = cur;
378 _cpp_process_line_notes (pfile, true);
379 return false;
382 /* Skip a C++ line comment, leaving buffer->cur pointing to the
383 terminating newline. Handles escaped newlines. Returns nonzero
384 if a multiline comment. */
385 static int
386 skip_line_comment (cpp_reader *pfile)
388 cpp_buffer *buffer = pfile->buffer;
389 source_location orig_line = pfile->line_table->highest_line;
391 while (*buffer->cur != '\n')
392 buffer->cur++;
394 _cpp_process_line_notes (pfile, true);
395 return orig_line != pfile->line_table->highest_line;
398 /* Skips whitespace, saving the next non-whitespace character. */
399 static void
400 skip_whitespace (cpp_reader *pfile, cppchar_t c)
402 cpp_buffer *buffer = pfile->buffer;
403 bool saw_NUL = false;
407 /* Horizontal space always OK. */
408 if (c == ' ' || c == '\t')
410 /* Just \f \v or \0 left. */
411 else if (c == '\0')
412 saw_NUL = true;
413 else if (pfile->state.in_directive && CPP_PEDANTIC (pfile))
414 cpp_error_with_line (pfile, CPP_DL_PEDWARN, pfile->line_table->highest_line,
415 CPP_BUF_COL (buffer),
416 "%s in preprocessing directive",
417 c == '\f' ? "form feed" : "vertical tab");
419 c = *buffer->cur++;
421 /* We only want non-vertical space, i.e. ' ' \t \f \v \0. */
422 while (is_nvspace (c));
424 if (saw_NUL)
425 cpp_error (pfile, CPP_DL_WARNING, "null character(s) ignored");
427 buffer->cur--;
430 /* See if the characters of a number token are valid in a name (no
431 '.', '+' or '-'). */
432 static int
433 name_p (cpp_reader *pfile, const cpp_string *string)
435 unsigned int i;
437 for (i = 0; i < string->len; i++)
438 if (!is_idchar (string->text[i]))
439 return 0;
441 return 1;
444 /* After parsing an identifier or other sequence, produce a warning about
445 sequences not in NFC/NFKC. */
446 static void
447 warn_about_normalization (cpp_reader *pfile,
448 const cpp_token *token,
449 const struct normalize_state *s)
451 if (CPP_OPTION (pfile, warn_normalize) < NORMALIZE_STATE_RESULT (s)
452 && !pfile->state.skipping)
454 /* Make sure that the token is printed using UCNs, even
455 if we'd otherwise happily print UTF-8. */
456 unsigned char *buf = XNEWVEC (unsigned char, cpp_token_len (token));
457 size_t sz;
459 sz = cpp_spell_token (pfile, token, buf, false) - buf;
460 if (NORMALIZE_STATE_RESULT (s) == normalized_C)
461 cpp_error_with_line (pfile, CPP_DL_WARNING, token->src_loc, 0,
462 "`%.*s' is not in NFKC", (int) sz, buf);
463 else
464 cpp_error_with_line (pfile, CPP_DL_WARNING, token->src_loc, 0,
465 "`%.*s' is not in NFC", (int) sz, buf);
469 /* Returns TRUE if the sequence starting at buffer->cur is invalid in
470 an identifier. FIRST is TRUE if this starts an identifier. */
471 static bool
472 forms_identifier_p (cpp_reader *pfile, int first,
473 struct normalize_state *state)
475 cpp_buffer *buffer = pfile->buffer;
477 if (*buffer->cur == '$')
479 if (!CPP_OPTION (pfile, dollars_in_ident))
480 return false;
482 buffer->cur++;
483 if (CPP_OPTION (pfile, warn_dollars) && !pfile->state.skipping)
485 CPP_OPTION (pfile, warn_dollars) = 0;
486 cpp_error (pfile, CPP_DL_PEDWARN, "'$' in identifier or number");
489 return true;
492 /* Is this a syntactically valid UCN? */
493 if (CPP_OPTION (pfile, extended_identifiers)
494 && *buffer->cur == '\\'
495 && (buffer->cur[1] == 'u' || buffer->cur[1] == 'U'))
497 buffer->cur += 2;
498 if (_cpp_valid_ucn (pfile, &buffer->cur, buffer->rlimit, 1 + !first,
499 state))
500 return true;
501 buffer->cur -= 2;
504 return false;
507 /* Lex an identifier starting at BUFFER->CUR - 1. */
508 static cpp_hashnode *
509 lex_identifier (cpp_reader *pfile, const uchar *base, bool starts_ucn,
510 struct normalize_state *nst)
512 cpp_hashnode *result;
513 const uchar *cur;
514 unsigned int len;
515 unsigned int hash = HT_HASHSTEP (0, *base);
517 cur = pfile->buffer->cur;
518 if (! starts_ucn)
519 while (ISIDNUM (*cur))
521 hash = HT_HASHSTEP (hash, *cur);
522 cur++;
524 pfile->buffer->cur = cur;
525 if (starts_ucn || forms_identifier_p (pfile, false, nst))
527 /* Slower version for identifiers containing UCNs (or $). */
528 do {
529 while (ISIDNUM (*pfile->buffer->cur))
531 pfile->buffer->cur++;
532 NORMALIZE_STATE_UPDATE_IDNUM (nst);
534 } while (forms_identifier_p (pfile, false, nst));
535 result = _cpp_interpret_identifier (pfile, base,
536 pfile->buffer->cur - base);
538 else
540 len = cur - base;
541 hash = HT_HASHFINISH (hash, len);
543 result = CPP_HASHNODE (ht_lookup_with_hash (pfile->hash_table,
544 base, len, hash, HT_ALLOC));
547 /* Rarely, identifiers require diagnostics when lexed. */
548 if (__builtin_expect ((result->flags & NODE_DIAGNOSTIC)
549 && !pfile->state.skipping, 0))
551 /* It is allowed to poison the same identifier twice. */
552 if ((result->flags & NODE_POISONED) && !pfile->state.poisoned_ok)
553 cpp_error (pfile, CPP_DL_ERROR, "attempt to use poisoned \"%s\"",
554 NODE_NAME (result));
556 /* Constraint 6.10.3.5: __VA_ARGS__ should only appear in the
557 replacement list of a variadic macro. */
558 if (result == pfile->spec_nodes.n__VA_ARGS__
559 && !pfile->state.va_args_ok)
560 cpp_error (pfile, CPP_DL_PEDWARN,
561 "__VA_ARGS__ can only appear in the expansion"
562 " of a C99 variadic macro");
564 /* For -Wc++-compat, warn about use of C++ named operators. */
565 if (result->flags & NODE_WARN_OPERATOR)
566 cpp_error (pfile, CPP_DL_WARNING,
567 "identifier \"%s\" is a special operator name in C++",
568 NODE_NAME (result));
571 return result;
574 /* Lex a number to NUMBER starting at BUFFER->CUR - 1. */
575 static void
576 lex_number (cpp_reader *pfile, cpp_string *number,
577 struct normalize_state *nst)
579 const uchar *cur;
580 const uchar *base;
581 uchar *dest;
583 base = pfile->buffer->cur - 1;
586 cur = pfile->buffer->cur;
588 /* N.B. ISIDNUM does not include $. */
589 while (ISIDNUM (*cur) || *cur == '.' || VALID_SIGN (*cur, cur[-1]))
591 cur++;
592 NORMALIZE_STATE_UPDATE_IDNUM (nst);
595 pfile->buffer->cur = cur;
597 while (forms_identifier_p (pfile, false, nst));
599 number->len = cur - base;
600 dest = _cpp_unaligned_alloc (pfile, number->len + 1);
601 memcpy (dest, base, number->len);
602 dest[number->len] = '\0';
603 number->text = dest;
606 /* Create a token of type TYPE with a literal spelling. */
607 static void
608 create_literal (cpp_reader *pfile, cpp_token *token, const uchar *base,
609 unsigned int len, enum cpp_ttype type)
611 uchar *dest = _cpp_unaligned_alloc (pfile, len + 1);
613 memcpy (dest, base, len);
614 dest[len] = '\0';
615 token->type = type;
616 token->val.str.len = len;
617 token->val.str.text = dest;
620 /* Lexes a string, character constant, or angle-bracketed header file
621 name. The stored string contains the spelling, including opening
622 quote and leading any leading 'L', 'u' or 'U'. It returns the type
623 of the literal, or CPP_OTHER if it was not properly terminated, or
624 CPP_LESS for an unterminated header name which must be relexed as
625 normal tokens.
627 The spelling is NUL-terminated, but it is not guaranteed that this
628 is the first NUL since embedded NULs are preserved. */
629 static void
630 lex_string (cpp_reader *pfile, cpp_token *token, const uchar *base)
632 bool saw_NUL = false;
633 const uchar *cur;
634 cppchar_t terminator;
635 enum cpp_ttype type;
637 cur = base;
638 terminator = *cur++;
639 if (terminator == 'L' || terminator == 'u' || terminator == 'U')
640 terminator = *cur++;
641 if (terminator == '\"')
642 type = (*base == 'L' ? CPP_WSTRING :
643 *base == 'U' ? CPP_STRING32 :
644 *base == 'u' ? CPP_STRING16 : CPP_STRING);
645 else if (terminator == '\'')
646 type = (*base == 'L' ? CPP_WCHAR :
647 *base == 'U' ? CPP_CHAR32 :
648 *base == 'u' ? CPP_CHAR16 : CPP_CHAR);
649 else
650 terminator = '>', type = CPP_HEADER_NAME;
652 for (;;)
654 cppchar_t c = *cur++;
656 /* In #include-style directives, terminators are not escapable. */
657 if (c == '\\' && !pfile->state.angled_headers && *cur != '\n')
658 cur++;
659 else if (c == terminator)
660 break;
661 else if (c == '\n')
663 cur--;
664 /* Unmatched quotes always yield undefined behavior, but
665 greedy lexing means that what appears to be an unterminated
666 header name may actually be a legitimate sequence of tokens. */
667 if (terminator == '>')
669 token->type = CPP_LESS;
670 return;
672 type = CPP_OTHER;
673 break;
675 else if (c == '\0')
676 saw_NUL = true;
679 if (saw_NUL && !pfile->state.skipping)
680 cpp_error (pfile, CPP_DL_WARNING,
681 "null character(s) preserved in literal");
683 if (type == CPP_OTHER && CPP_OPTION (pfile, lang) != CLK_ASM)
684 cpp_error (pfile, CPP_DL_PEDWARN, "missing terminating %c character",
685 (int) terminator);
687 pfile->buffer->cur = cur;
688 create_literal (pfile, token, base, cur - base, type);
691 /* Return the comment table. The client may not make any assumption
692 about the ordering of the table. */
693 cpp_comment_table *
694 cpp_get_comments (cpp_reader *pfile)
696 return &pfile->comments;
699 /* Append a comment to the end of the comment table. */
700 static void
701 store_comment (cpp_reader *pfile, cpp_token *token)
703 int len;
705 if (pfile->comments.allocated == 0)
707 pfile->comments.allocated = 256;
708 pfile->comments.entries = (cpp_comment *) xmalloc
709 (pfile->comments.allocated * sizeof (cpp_comment));
712 if (pfile->comments.count == pfile->comments.allocated)
714 pfile->comments.allocated *= 2;
715 pfile->comments.entries = (cpp_comment *) xrealloc
716 (pfile->comments.entries,
717 pfile->comments.allocated * sizeof (cpp_comment));
720 len = token->val.str.len;
722 /* Copy comment. Note, token may not be NULL terminated. */
723 pfile->comments.entries[pfile->comments.count].comment =
724 (char *) xmalloc (sizeof (char) * (len + 1));
725 memcpy (pfile->comments.entries[pfile->comments.count].comment,
726 token->val.str.text, len);
727 pfile->comments.entries[pfile->comments.count].comment[len] = '\0';
729 /* Set source location. */
730 pfile->comments.entries[pfile->comments.count].sloc = token->src_loc;
732 /* Increment the count of entries in the comment table. */
733 pfile->comments.count++;
736 /* The stored comment includes the comment start and any terminator. */
737 static void
738 save_comment (cpp_reader *pfile, cpp_token *token, const unsigned char *from,
739 cppchar_t type)
741 unsigned char *buffer;
742 unsigned int len, clen;
744 len = pfile->buffer->cur - from + 1; /* + 1 for the initial '/'. */
746 /* C++ comments probably (not definitely) have moved past a new
747 line, which we don't want to save in the comment. */
748 if (is_vspace (pfile->buffer->cur[-1]))
749 len--;
751 /* If we are currently in a directive, then we need to store all
752 C++ comments as C comments internally, and so we need to
753 allocate a little extra space in that case.
755 Note that the only time we encounter a directive here is
756 when we are saving comments in a "#define". */
757 clen = (pfile->state.in_directive && type == '/') ? len + 2 : len;
759 buffer = _cpp_unaligned_alloc (pfile, clen);
761 token->type = CPP_COMMENT;
762 token->val.str.len = clen;
763 token->val.str.text = buffer;
765 buffer[0] = '/';
766 memcpy (buffer + 1, from, len - 1);
768 /* Finish conversion to a C comment, if necessary. */
769 if (pfile->state.in_directive && type == '/')
771 buffer[1] = '*';
772 buffer[clen - 2] = '*';
773 buffer[clen - 1] = '/';
776 /* Finally store this comment for use by clients of libcpp. */
777 store_comment (pfile, token);
780 /* Allocate COUNT tokens for RUN. */
781 void
782 _cpp_init_tokenrun (tokenrun *run, unsigned int count)
784 run->base = XNEWVEC (cpp_token, count);
785 run->limit = run->base + count;
786 run->next = NULL;
789 /* Returns the next tokenrun, or creates one if there is none. */
790 static tokenrun *
791 next_tokenrun (tokenrun *run)
793 if (run->next == NULL)
795 run->next = XNEW (tokenrun);
796 run->next->prev = run;
797 _cpp_init_tokenrun (run->next, 250);
800 return run->next;
803 /* Look ahead in the input stream. */
804 const cpp_token *
805 cpp_peek_token (cpp_reader *pfile, int index)
807 cpp_context *context = pfile->context;
808 const cpp_token *peektok;
809 int count;
811 /* First, scan through any pending cpp_context objects. */
812 while (context->prev)
814 ptrdiff_t sz = (context->direct_p
815 ? LAST (context).token - FIRST (context).token
816 : LAST (context).ptoken - FIRST (context).ptoken);
818 if (index < (int) sz)
819 return (context->direct_p
820 ? FIRST (context).token + index
821 : *(FIRST (context).ptoken + index));
823 index -= (int) sz;
824 context = context->prev;
827 /* We will have to read some new tokens after all (and do so
828 without invalidating preceding tokens). */
829 count = index;
830 pfile->keep_tokens++;
834 peektok = _cpp_lex_token (pfile);
835 if (peektok->type == CPP_EOF)
836 return peektok;
838 while (index--);
840 _cpp_backup_tokens_direct (pfile, count + 1);
841 pfile->keep_tokens--;
843 return peektok;
846 /* Allocate a single token that is invalidated at the same time as the
847 rest of the tokens on the line. Has its line and col set to the
848 same as the last lexed token, so that diagnostics appear in the
849 right place. */
850 cpp_token *
851 _cpp_temp_token (cpp_reader *pfile)
853 cpp_token *old, *result;
854 ptrdiff_t sz = pfile->cur_run->limit - pfile->cur_token;
855 ptrdiff_t la = (ptrdiff_t) pfile->lookaheads;
857 old = pfile->cur_token - 1;
858 /* Any pre-existing lookaheads must not be clobbered. */
859 if (la)
861 if (sz <= la)
863 tokenrun *next = next_tokenrun (pfile->cur_run);
865 if (sz < la)
866 memmove (next->base + 1, next->base,
867 (la - sz) * sizeof (cpp_token));
869 next->base[0] = pfile->cur_run->limit[-1];
872 if (sz > 1)
873 memmove (pfile->cur_token + 1, pfile->cur_token,
874 MIN (la, sz - 1) * sizeof (cpp_token));
877 if (!sz && pfile->cur_token == pfile->cur_run->limit)
879 pfile->cur_run = next_tokenrun (pfile->cur_run);
880 pfile->cur_token = pfile->cur_run->base;
883 result = pfile->cur_token++;
884 result->src_loc = old->src_loc;
885 return result;
888 /* Lex a token into RESULT (external interface). Takes care of issues
889 like directive handling, token lookahead, multiple include
890 optimization and skipping. */
891 const cpp_token *
892 _cpp_lex_token (cpp_reader *pfile)
894 cpp_token *result;
896 for (;;)
898 if (pfile->cur_token == pfile->cur_run->limit)
900 pfile->cur_run = next_tokenrun (pfile->cur_run);
901 pfile->cur_token = pfile->cur_run->base;
903 /* We assume that the current token is somewhere in the current
904 run. */
905 if (pfile->cur_token < pfile->cur_run->base
906 || pfile->cur_token >= pfile->cur_run->limit)
907 abort ();
909 if (pfile->lookaheads)
911 pfile->lookaheads--;
912 result = pfile->cur_token++;
914 else
915 result = _cpp_lex_direct (pfile);
917 if (result->flags & BOL)
919 /* Is this a directive. If _cpp_handle_directive returns
920 false, it is an assembler #. */
921 if (result->type == CPP_HASH
922 /* 6.10.3 p 11: Directives in a list of macro arguments
923 gives undefined behavior. This implementation
924 handles the directive as normal. */
925 && pfile->state.parsing_args != 1)
927 if (_cpp_handle_directive (pfile, result->flags & PREV_WHITE))
929 if (pfile->directive_result.type == CPP_PADDING)
930 continue;
931 result = &pfile->directive_result;
934 else if (pfile->state.in_deferred_pragma)
935 result = &pfile->directive_result;
937 if (pfile->cb.line_change && !pfile->state.skipping)
938 pfile->cb.line_change (pfile, result, pfile->state.parsing_args);
941 /* We don't skip tokens in directives. */
942 if (pfile->state.in_directive || pfile->state.in_deferred_pragma)
943 break;
945 /* Outside a directive, invalidate controlling macros. At file
946 EOF, _cpp_lex_direct takes care of popping the buffer, so we never
947 get here and MI optimization works. */
948 pfile->mi_valid = false;
950 if (!pfile->state.skipping || result->type == CPP_EOF)
951 break;
954 return result;
957 /* Returns true if a fresh line has been loaded. */
958 bool
959 _cpp_get_fresh_line (cpp_reader *pfile)
961 int return_at_eof;
963 /* We can't get a new line until we leave the current directive. */
964 if (pfile->state.in_directive)
965 return false;
967 for (;;)
969 cpp_buffer *buffer = pfile->buffer;
971 if (!buffer->need_line)
972 return true;
974 if (buffer->next_line < buffer->rlimit)
976 _cpp_clean_line (pfile);
977 return true;
980 /* First, get out of parsing arguments state. */
981 if (pfile->state.parsing_args)
982 return false;
984 /* End of buffer. Non-empty files should end in a newline. */
985 if (buffer->buf != buffer->rlimit
986 && buffer->next_line > buffer->rlimit
987 && !buffer->from_stage3)
989 /* Clip to buffer size. */
990 buffer->next_line = buffer->rlimit;
993 return_at_eof = buffer->return_at_eof;
994 _cpp_pop_buffer (pfile);
995 if (pfile->buffer == NULL || return_at_eof)
996 return false;
1000 #define IF_NEXT_IS(CHAR, THEN_TYPE, ELSE_TYPE) \
1001 do \
1003 result->type = ELSE_TYPE; \
1004 if (*buffer->cur == CHAR) \
1005 buffer->cur++, result->type = THEN_TYPE; \
1007 while (0)
1009 /* Lex a token into pfile->cur_token, which is also incremented, to
1010 get diagnostics pointing to the correct location.
1012 Does not handle issues such as token lookahead, multiple-include
1013 optimization, directives, skipping etc. This function is only
1014 suitable for use by _cpp_lex_token, and in special cases like
1015 lex_expansion_token which doesn't care for any of these issues.
1017 When meeting a newline, returns CPP_EOF if parsing a directive,
1018 otherwise returns to the start of the token buffer if permissible.
1019 Returns the location of the lexed token. */
1020 cpp_token *
1021 _cpp_lex_direct (cpp_reader *pfile)
1023 cppchar_t c;
1024 cpp_buffer *buffer;
1025 const unsigned char *comment_start;
1026 cpp_token *result = pfile->cur_token++;
1028 fresh_line:
1029 result->flags = 0;
1030 buffer = pfile->buffer;
1031 if (buffer->need_line)
1033 if (pfile->state.in_deferred_pragma)
1035 result->type = CPP_PRAGMA_EOL;
1036 pfile->state.in_deferred_pragma = false;
1037 if (!pfile->state.pragma_allow_expansion)
1038 pfile->state.prevent_expansion--;
1039 return result;
1041 if (!_cpp_get_fresh_line (pfile))
1043 result->type = CPP_EOF;
1044 if (!pfile->state.in_directive)
1046 /* Tell the compiler the line number of the EOF token. */
1047 result->src_loc = pfile->line_table->highest_line;
1048 result->flags = BOL;
1050 return result;
1052 if (!pfile->keep_tokens)
1054 pfile->cur_run = &pfile->base_run;
1055 result = pfile->base_run.base;
1056 pfile->cur_token = result + 1;
1058 result->flags = BOL;
1059 if (pfile->state.parsing_args == 2)
1060 result->flags |= PREV_WHITE;
1062 buffer = pfile->buffer;
1063 update_tokens_line:
1064 result->src_loc = pfile->line_table->highest_line;
1066 skipped_white:
1067 if (buffer->cur >= buffer->notes[buffer->cur_note].pos
1068 && !pfile->overlaid_buffer)
1070 _cpp_process_line_notes (pfile, false);
1071 result->src_loc = pfile->line_table->highest_line;
1073 c = *buffer->cur++;
1075 LINEMAP_POSITION_FOR_COLUMN (result->src_loc, pfile->line_table,
1076 CPP_BUF_COLUMN (buffer, buffer->cur));
1078 switch (c)
1080 case ' ': case '\t': case '\f': case '\v': case '\0':
1081 result->flags |= PREV_WHITE;
1082 skip_whitespace (pfile, c);
1083 goto skipped_white;
1085 case '\n':
1086 if (buffer->cur < buffer->rlimit)
1087 CPP_INCREMENT_LINE (pfile, 0);
1088 buffer->need_line = true;
1089 goto fresh_line;
1091 case '0': case '1': case '2': case '3': case '4':
1092 case '5': case '6': case '7': case '8': case '9':
1094 struct normalize_state nst = INITIAL_NORMALIZE_STATE;
1095 result->type = CPP_NUMBER;
1096 lex_number (pfile, &result->val.str, &nst);
1097 warn_about_normalization (pfile, result, &nst);
1098 break;
1101 case 'L':
1102 case 'u':
1103 case 'U':
1104 /* 'L', 'u' or 'U' may introduce wide characters or strings. */
1105 if (c == 'L' || CPP_OPTION (pfile, uliterals))
1107 if (*buffer->cur == '\'' || *buffer->cur == '"')
1109 lex_string (pfile, result, buffer->cur - 1);
1110 break;
1113 /* Fall through. */
1115 case '_':
1116 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
1117 case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
1118 case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
1119 case 's': case 't': case 'v': case 'w': case 'x':
1120 case 'y': case 'z':
1121 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
1122 case 'G': case 'H': case 'I': case 'J': case 'K':
1123 case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
1124 case 'S': case 'T': case 'V': case 'W': case 'X':
1125 case 'Y': case 'Z':
1126 result->type = CPP_NAME;
1128 struct normalize_state nst = INITIAL_NORMALIZE_STATE;
1129 result->val.node.node = lex_identifier (pfile, buffer->cur - 1, false,
1130 &nst);
1131 warn_about_normalization (pfile, result, &nst);
1134 /* Convert named operators to their proper types. */
1135 if (result->val.node.node->flags & NODE_OPERATOR)
1137 result->flags |= NAMED_OP;
1138 result->type = (enum cpp_ttype) result->val.node.node->directive_index;
1140 break;
1142 case '\'':
1143 case '"':
1144 lex_string (pfile, result, buffer->cur - 1);
1145 break;
1147 case '/':
1148 /* A potential block or line comment. */
1149 comment_start = buffer->cur;
1150 c = *buffer->cur;
1152 if (c == '*')
1154 if (_cpp_skip_block_comment (pfile))
1155 cpp_error (pfile, CPP_DL_ERROR, "unterminated comment");
1157 else if (c == '/' && (CPP_OPTION (pfile, cplusplus_comments)
1158 || cpp_in_system_header (pfile)))
1160 /* Warn about comments only if pedantically GNUC89, and not
1161 in system headers. */
1162 if (CPP_OPTION (pfile, lang) == CLK_GNUC89 && CPP_PEDANTIC (pfile)
1163 && ! buffer->warned_cplusplus_comments)
1165 cpp_error (pfile, CPP_DL_PEDWARN,
1166 "C++ style comments are not allowed in ISO C90");
1167 cpp_error (pfile, CPP_DL_PEDWARN,
1168 "(this will be reported only once per input file)");
1169 buffer->warned_cplusplus_comments = 1;
1172 if (skip_line_comment (pfile) && CPP_OPTION (pfile, warn_comments))
1173 cpp_error (pfile, CPP_DL_WARNING, "multi-line comment");
1175 else if (c == '=')
1177 buffer->cur++;
1178 result->type = CPP_DIV_EQ;
1179 break;
1181 else
1183 result->type = CPP_DIV;
1184 break;
1187 if (!pfile->state.save_comments)
1189 result->flags |= PREV_WHITE;
1190 goto update_tokens_line;
1193 /* Save the comment as a token in its own right. */
1194 save_comment (pfile, result, comment_start, c);
1195 break;
1197 case '<':
1198 if (pfile->state.angled_headers)
1200 lex_string (pfile, result, buffer->cur - 1);
1201 if (result->type != CPP_LESS)
1202 break;
1205 result->type = CPP_LESS;
1206 if (*buffer->cur == '=')
1207 buffer->cur++, result->type = CPP_LESS_EQ;
1208 else if (*buffer->cur == '<')
1210 buffer->cur++;
1211 IF_NEXT_IS ('=', CPP_LSHIFT_EQ, CPP_LSHIFT);
1213 else if (CPP_OPTION (pfile, digraphs))
1215 if (*buffer->cur == ':')
1217 buffer->cur++;
1218 result->flags |= DIGRAPH;
1219 result->type = CPP_OPEN_SQUARE;
1221 else if (*buffer->cur == '%')
1223 buffer->cur++;
1224 result->flags |= DIGRAPH;
1225 result->type = CPP_OPEN_BRACE;
1228 break;
1230 case '>':
1231 result->type = CPP_GREATER;
1232 if (*buffer->cur == '=')
1233 buffer->cur++, result->type = CPP_GREATER_EQ;
1234 else if (*buffer->cur == '>')
1236 buffer->cur++;
1237 IF_NEXT_IS ('=', CPP_RSHIFT_EQ, CPP_RSHIFT);
1239 break;
1241 case '%':
1242 result->type = CPP_MOD;
1243 if (*buffer->cur == '=')
1244 buffer->cur++, result->type = CPP_MOD_EQ;
1245 else if (CPP_OPTION (pfile, digraphs))
1247 if (*buffer->cur == ':')
1249 buffer->cur++;
1250 result->flags |= DIGRAPH;
1251 result->type = CPP_HASH;
1252 if (*buffer->cur == '%' && buffer->cur[1] == ':')
1253 buffer->cur += 2, result->type = CPP_PASTE, result->val.token_no = 0;
1255 else if (*buffer->cur == '>')
1257 buffer->cur++;
1258 result->flags |= DIGRAPH;
1259 result->type = CPP_CLOSE_BRACE;
1262 break;
1264 case '.':
1265 result->type = CPP_DOT;
1266 if (ISDIGIT (*buffer->cur))
1268 struct normalize_state nst = INITIAL_NORMALIZE_STATE;
1269 result->type = CPP_NUMBER;
1270 lex_number (pfile, &result->val.str, &nst);
1271 warn_about_normalization (pfile, result, &nst);
1273 else if (*buffer->cur == '.' && buffer->cur[1] == '.')
1274 buffer->cur += 2, result->type = CPP_ELLIPSIS;
1275 else if (*buffer->cur == '*' && CPP_OPTION (pfile, cplusplus))
1276 buffer->cur++, result->type = CPP_DOT_STAR;
1277 break;
1279 case '+':
1280 result->type = CPP_PLUS;
1281 if (*buffer->cur == '+')
1282 buffer->cur++, result->type = CPP_PLUS_PLUS;
1283 else if (*buffer->cur == '=')
1284 buffer->cur++, result->type = CPP_PLUS_EQ;
1285 break;
1287 case '-':
1288 result->type = CPP_MINUS;
1289 if (*buffer->cur == '>')
1291 buffer->cur++;
1292 result->type = CPP_DEREF;
1293 if (*buffer->cur == '*' && CPP_OPTION (pfile, cplusplus))
1294 buffer->cur++, result->type = CPP_DEREF_STAR;
1296 else if (*buffer->cur == '-')
1297 buffer->cur++, result->type = CPP_MINUS_MINUS;
1298 else if (*buffer->cur == '=')
1299 buffer->cur++, result->type = CPP_MINUS_EQ;
1300 break;
1302 case '&':
1303 result->type = CPP_AND;
1304 if (*buffer->cur == '&')
1305 buffer->cur++, result->type = CPP_AND_AND;
1306 else if (*buffer->cur == '=')
1307 buffer->cur++, result->type = CPP_AND_EQ;
1308 break;
1310 case '|':
1311 result->type = CPP_OR;
1312 if (*buffer->cur == '|')
1313 buffer->cur++, result->type = CPP_OR_OR;
1314 else if (*buffer->cur == '=')
1315 buffer->cur++, result->type = CPP_OR_EQ;
1316 break;
1318 case ':':
1319 result->type = CPP_COLON;
1320 if (*buffer->cur == ':' && CPP_OPTION (pfile, cplusplus))
1321 buffer->cur++, result->type = CPP_SCOPE;
1322 else if (*buffer->cur == '>' && CPP_OPTION (pfile, digraphs))
1324 buffer->cur++;
1325 result->flags |= DIGRAPH;
1326 result->type = CPP_CLOSE_SQUARE;
1328 break;
1330 case '*': IF_NEXT_IS ('=', CPP_MULT_EQ, CPP_MULT); break;
1331 case '=': IF_NEXT_IS ('=', CPP_EQ_EQ, CPP_EQ); break;
1332 case '!': IF_NEXT_IS ('=', CPP_NOT_EQ, CPP_NOT); break;
1333 case '^': IF_NEXT_IS ('=', CPP_XOR_EQ, CPP_XOR); break;
1334 case '#': IF_NEXT_IS ('#', CPP_PASTE, CPP_HASH); result->val.token_no = 0; break;
1336 case '?': result->type = CPP_QUERY; break;
1337 case '~': result->type = CPP_COMPL; break;
1338 case ',': result->type = CPP_COMMA; break;
1339 case '(': result->type = CPP_OPEN_PAREN; break;
1340 case ')': result->type = CPP_CLOSE_PAREN; break;
1341 case '[': result->type = CPP_OPEN_SQUARE; break;
1342 case ']': result->type = CPP_CLOSE_SQUARE; break;
1343 case '{': result->type = CPP_OPEN_BRACE; break;
1344 case '}': result->type = CPP_CLOSE_BRACE; break;
1345 case ';': result->type = CPP_SEMICOLON; break;
1347 /* @ is a punctuator in Objective-C. */
1348 case '@': result->type = CPP_ATSIGN; break;
1350 case '$':
1351 case '\\':
1353 const uchar *base = --buffer->cur;
1354 struct normalize_state nst = INITIAL_NORMALIZE_STATE;
1356 if (forms_identifier_p (pfile, true, &nst))
1358 result->type = CPP_NAME;
1359 result->val.node.node = lex_identifier (pfile, base, true, &nst);
1360 warn_about_normalization (pfile, result, &nst);
1361 break;
1363 buffer->cur++;
1366 default:
1367 create_literal (pfile, result, buffer->cur - 1, 1, CPP_OTHER);
1368 break;
1371 return result;
1374 /* An upper bound on the number of bytes needed to spell TOKEN.
1375 Does not include preceding whitespace. */
1376 unsigned int
1377 cpp_token_len (const cpp_token *token)
1379 unsigned int len;
1381 switch (TOKEN_SPELL (token))
1383 default: len = 6; break;
1384 case SPELL_LITERAL: len = token->val.str.len; break;
1385 case SPELL_IDENT: len = NODE_LEN (token->val.node.node) * 10; break;
1388 return len;
1391 /* Parse UTF-8 out of NAMEP and place a \U escape in BUFFER.
1392 Return the number of bytes read out of NAME. (There are always
1393 10 bytes written to BUFFER.) */
1395 static size_t
1396 utf8_to_ucn (unsigned char *buffer, const unsigned char *name)
1398 int j;
1399 int ucn_len = 0;
1400 int ucn_len_c;
1401 unsigned t;
1402 unsigned long utf32;
1404 /* Compute the length of the UTF-8 sequence. */
1405 for (t = *name; t & 0x80; t <<= 1)
1406 ucn_len++;
1408 utf32 = *name & (0x7F >> ucn_len);
1409 for (ucn_len_c = 1; ucn_len_c < ucn_len; ucn_len_c++)
1411 utf32 = (utf32 << 6) | (*++name & 0x3F);
1413 /* Ill-formed UTF-8. */
1414 if ((*name & ~0x3F) != 0x80)
1415 abort ();
1418 *buffer++ = '\\';
1419 *buffer++ = 'U';
1420 for (j = 7; j >= 0; j--)
1421 *buffer++ = "0123456789abcdef"[(utf32 >> (4 * j)) & 0xF];
1422 return ucn_len;
1425 /* Given a token TYPE corresponding to a digraph, return a pointer to
1426 the spelling of the digraph. */
1427 static const unsigned char *
1428 cpp_digraph2name (enum cpp_ttype type)
1430 return digraph_spellings[(int) type - (int) CPP_FIRST_DIGRAPH];
1433 /* Write the spelling of a token TOKEN to BUFFER. The buffer must
1434 already contain the enough space to hold the token's spelling.
1435 Returns a pointer to the character after the last character written.
1436 FORSTRING is true if this is to be the spelling after translation
1437 phase 1 (this is different for UCNs).
1438 FIXME: Would be nice if we didn't need the PFILE argument. */
1439 unsigned char *
1440 cpp_spell_token (cpp_reader *pfile, const cpp_token *token,
1441 unsigned char *buffer, bool forstring)
1443 switch (TOKEN_SPELL (token))
1445 case SPELL_OPERATOR:
1447 const unsigned char *spelling;
1448 unsigned char c;
1450 if (token->flags & DIGRAPH)
1451 spelling = cpp_digraph2name (token->type);
1452 else if (token->flags & NAMED_OP)
1453 goto spell_ident;
1454 else
1455 spelling = TOKEN_NAME (token);
1457 while ((c = *spelling++) != '\0')
1458 *buffer++ = c;
1460 break;
1462 spell_ident:
1463 case SPELL_IDENT:
1464 if (forstring)
1466 memcpy (buffer, NODE_NAME (token->val.node.node),
1467 NODE_LEN (token->val.node.node));
1468 buffer += NODE_LEN (token->val.node.node);
1470 else
1472 size_t i;
1473 const unsigned char * name = NODE_NAME (token->val.node.node);
1475 for (i = 0; i < NODE_LEN (token->val.node.node); i++)
1476 if (name[i] & ~0x7F)
1478 i += utf8_to_ucn (buffer, name + i) - 1;
1479 buffer += 10;
1481 else
1482 *buffer++ = NODE_NAME (token->val.node.node)[i];
1484 break;
1486 case SPELL_LITERAL:
1487 memcpy (buffer, token->val.str.text, token->val.str.len);
1488 buffer += token->val.str.len;
1489 break;
1491 case SPELL_NONE:
1492 cpp_error (pfile, CPP_DL_ICE,
1493 "unspellable token %s", TOKEN_NAME (token));
1494 break;
1497 return buffer;
1500 /* Returns TOKEN spelt as a null-terminated string. The string is
1501 freed when the reader is destroyed. Useful for diagnostics. */
1502 unsigned char *
1503 cpp_token_as_text (cpp_reader *pfile, const cpp_token *token)
1505 unsigned int len = cpp_token_len (token) + 1;
1506 unsigned char *start = _cpp_unaligned_alloc (pfile, len), *end;
1508 end = cpp_spell_token (pfile, token, start, false);
1509 end[0] = '\0';
1511 return start;
1514 /* Returns a pointer to a string which spells the token defined by
1515 TYPE and FLAGS. Used by C front ends, which really should move to
1516 using cpp_token_as_text. */
1517 const char *
1518 cpp_type2name (enum cpp_ttype type, unsigned char flags)
1520 if (flags & DIGRAPH)
1521 return (const char *) cpp_digraph2name (type);
1522 else if (flags & NAMED_OP)
1523 return cpp_named_operator2name (type);
1525 return (const char *) token_spellings[type].name;
1528 /* Writes the spelling of token to FP, without any preceding space.
1529 Separated from cpp_spell_token for efficiency - to avoid stdio
1530 double-buffering. */
1531 void
1532 cpp_output_token (const cpp_token *token, FILE *fp)
1534 switch (TOKEN_SPELL (token))
1536 case SPELL_OPERATOR:
1538 const unsigned char *spelling;
1539 int c;
1541 if (token->flags & DIGRAPH)
1542 spelling = cpp_digraph2name (token->type);
1543 else if (token->flags & NAMED_OP)
1544 goto spell_ident;
1545 else
1546 spelling = TOKEN_NAME (token);
1548 c = *spelling;
1550 putc (c, fp);
1551 while ((c = *++spelling) != '\0');
1553 break;
1555 spell_ident:
1556 case SPELL_IDENT:
1558 size_t i;
1559 const unsigned char * name = NODE_NAME (token->val.node.node);
1561 for (i = 0; i < NODE_LEN (token->val.node.node); i++)
1562 if (name[i] & ~0x7F)
1564 unsigned char buffer[10];
1565 i += utf8_to_ucn (buffer, name + i) - 1;
1566 fwrite (buffer, 1, 10, fp);
1568 else
1569 fputc (NODE_NAME (token->val.node.node)[i], fp);
1571 break;
1573 case SPELL_LITERAL:
1574 fwrite (token->val.str.text, 1, token->val.str.len, fp);
1575 break;
1577 case SPELL_NONE:
1578 /* An error, most probably. */
1579 break;
1583 /* Compare two tokens. */
1585 _cpp_equiv_tokens (const cpp_token *a, const cpp_token *b)
1587 if (a->type == b->type && a->flags == b->flags)
1588 switch (TOKEN_SPELL (a))
1590 default: /* Keep compiler happy. */
1591 case SPELL_OPERATOR:
1592 /* token_no is used to track where multiple consecutive ##
1593 tokens were originally located. */
1594 return (a->type != CPP_PASTE || a->val.token_no == b->val.token_no);
1595 case SPELL_NONE:
1596 return (a->type != CPP_MACRO_ARG
1597 || a->val.macro_arg.arg_no == b->val.macro_arg.arg_no);
1598 case SPELL_IDENT:
1599 return a->val.node.node == b->val.node.node;
1600 case SPELL_LITERAL:
1601 return (a->val.str.len == b->val.str.len
1602 && !memcmp (a->val.str.text, b->val.str.text,
1603 a->val.str.len));
1606 return 0;
1609 /* Returns nonzero if a space should be inserted to avoid an
1610 accidental token paste for output. For simplicity, it is
1611 conservative, and occasionally advises a space where one is not
1612 needed, e.g. "." and ".2". */
1614 cpp_avoid_paste (cpp_reader *pfile, const cpp_token *token1,
1615 const cpp_token *token2)
1617 enum cpp_ttype a = token1->type, b = token2->type;
1618 cppchar_t c;
1620 if (token1->flags & NAMED_OP)
1621 a = CPP_NAME;
1622 if (token2->flags & NAMED_OP)
1623 b = CPP_NAME;
1625 c = EOF;
1626 if (token2->flags & DIGRAPH)
1627 c = digraph_spellings[(int) b - (int) CPP_FIRST_DIGRAPH][0];
1628 else if (token_spellings[b].category == SPELL_OPERATOR)
1629 c = token_spellings[b].name[0];
1631 /* Quickly get everything that can paste with an '='. */
1632 if ((int) a <= (int) CPP_LAST_EQ && c == '=')
1633 return 1;
1635 switch (a)
1637 case CPP_GREATER: return c == '>';
1638 case CPP_LESS: return c == '<' || c == '%' || c == ':';
1639 case CPP_PLUS: return c == '+';
1640 case CPP_MINUS: return c == '-' || c == '>';
1641 case CPP_DIV: return c == '/' || c == '*'; /* Comments. */
1642 case CPP_MOD: return c == ':' || c == '>';
1643 case CPP_AND: return c == '&';
1644 case CPP_OR: return c == '|';
1645 case CPP_COLON: return c == ':' || c == '>';
1646 case CPP_DEREF: return c == '*';
1647 case CPP_DOT: return c == '.' || c == '%' || b == CPP_NUMBER;
1648 case CPP_HASH: return c == '#' || c == '%'; /* Digraph form. */
1649 case CPP_NAME: return ((b == CPP_NUMBER
1650 && name_p (pfile, &token2->val.str))
1651 || b == CPP_NAME
1652 || b == CPP_CHAR || b == CPP_STRING); /* L */
1653 case CPP_NUMBER: return (b == CPP_NUMBER || b == CPP_NAME
1654 || c == '.' || c == '+' || c == '-');
1655 /* UCNs */
1656 case CPP_OTHER: return ((token1->val.str.text[0] == '\\'
1657 && b == CPP_NAME)
1658 || (CPP_OPTION (pfile, objc)
1659 && token1->val.str.text[0] == '@'
1660 && (b == CPP_NAME || b == CPP_STRING)));
1661 default: break;
1664 return 0;
1667 /* Output all the remaining tokens on the current line, and a newline
1668 character, to FP. Leading whitespace is removed. If there are
1669 macros, special token padding is not performed. */
1670 void
1671 cpp_output_line (cpp_reader *pfile, FILE *fp)
1673 const cpp_token *token;
1675 token = cpp_get_token (pfile);
1676 while (token->type != CPP_EOF)
1678 cpp_output_token (token, fp);
1679 token = cpp_get_token (pfile);
1680 if (token->flags & PREV_WHITE)
1681 putc (' ', fp);
1684 putc ('\n', fp);
1687 /* Return a string representation of all the remaining tokens on the
1688 current line. The result is allocated using xmalloc and must be
1689 freed by the caller. */
1690 unsigned char *
1691 cpp_output_line_to_string (cpp_reader *pfile, const unsigned char *dir_name)
1693 const cpp_token *token;
1694 unsigned int out = dir_name ? ustrlen (dir_name) : 0;
1695 unsigned int alloced = 120 + out;
1696 unsigned char *result = (unsigned char *) xmalloc (alloced);
1698 /* If DIR_NAME is empty, there are no initial contents. */
1699 if (dir_name)
1701 sprintf ((char *) result, "#%s ", dir_name);
1702 out += 2;
1705 token = cpp_get_token (pfile);
1706 while (token->type != CPP_EOF)
1708 unsigned char *last;
1709 /* Include room for a possible space and the terminating nul. */
1710 unsigned int len = cpp_token_len (token) + 2;
1712 if (out + len > alloced)
1714 alloced *= 2;
1715 if (out + len > alloced)
1716 alloced = out + len;
1717 result = (unsigned char *) xrealloc (result, alloced);
1720 last = cpp_spell_token (pfile, token, &result[out], 0);
1721 out = last - result;
1723 token = cpp_get_token (pfile);
1724 if (token->flags & PREV_WHITE)
1725 result[out++] = ' ';
1728 result[out] = '\0';
1729 return result;
1732 /* Memory buffers. Changing these three constants can have a dramatic
1733 effect on performance. The values here are reasonable defaults,
1734 but might be tuned. If you adjust them, be sure to test across a
1735 range of uses of cpplib, including heavy nested function-like macro
1736 expansion. Also check the change in peak memory usage (NJAMD is a
1737 good tool for this). */
1738 #define MIN_BUFF_SIZE 8000
1739 #define BUFF_SIZE_UPPER_BOUND(MIN_SIZE) (MIN_BUFF_SIZE + (MIN_SIZE) * 3 / 2)
1740 #define EXTENDED_BUFF_SIZE(BUFF, MIN_EXTRA) \
1741 (MIN_EXTRA + ((BUFF)->limit - (BUFF)->cur) * 2)
1743 #if MIN_BUFF_SIZE > BUFF_SIZE_UPPER_BOUND (0)
1744 #error BUFF_SIZE_UPPER_BOUND must be at least as large as MIN_BUFF_SIZE!
1745 #endif
1747 /* Create a new allocation buffer. Place the control block at the end
1748 of the buffer, so that buffer overflows will cause immediate chaos. */
1749 static _cpp_buff *
1750 new_buff (size_t len)
1752 _cpp_buff *result;
1753 unsigned char *base;
1755 if (len < MIN_BUFF_SIZE)
1756 len = MIN_BUFF_SIZE;
1757 len = CPP_ALIGN (len);
1759 base = XNEWVEC (unsigned char, len + sizeof (_cpp_buff));
1760 result = (_cpp_buff *) (base + len);
1761 result->base = base;
1762 result->cur = base;
1763 result->limit = base + len;
1764 result->next = NULL;
1765 return result;
1768 /* Place a chain of unwanted allocation buffers on the free list. */
1769 void
1770 _cpp_release_buff (cpp_reader *pfile, _cpp_buff *buff)
1772 _cpp_buff *end = buff;
1774 while (end->next)
1775 end = end->next;
1776 end->next = pfile->free_buffs;
1777 pfile->free_buffs = buff;
1780 /* Return a free buffer of size at least MIN_SIZE. */
1781 _cpp_buff *
1782 _cpp_get_buff (cpp_reader *pfile, size_t min_size)
1784 _cpp_buff *result, **p;
1786 for (p = &pfile->free_buffs;; p = &(*p)->next)
1788 size_t size;
1790 if (*p == NULL)
1791 return new_buff (min_size);
1792 result = *p;
1793 size = result->limit - result->base;
1794 /* Return a buffer that's big enough, but don't waste one that's
1795 way too big. */
1796 if (size >= min_size && size <= BUFF_SIZE_UPPER_BOUND (min_size))
1797 break;
1800 *p = result->next;
1801 result->next = NULL;
1802 result->cur = result->base;
1803 return result;
1806 /* Creates a new buffer with enough space to hold the uncommitted
1807 remaining bytes of BUFF, and at least MIN_EXTRA more bytes. Copies
1808 the excess bytes to the new buffer. Chains the new buffer after
1809 BUFF, and returns the new buffer. */
1810 _cpp_buff *
1811 _cpp_append_extend_buff (cpp_reader *pfile, _cpp_buff *buff, size_t min_extra)
1813 size_t size = EXTENDED_BUFF_SIZE (buff, min_extra);
1814 _cpp_buff *new_buff = _cpp_get_buff (pfile, size);
1816 buff->next = new_buff;
1817 memcpy (new_buff->base, buff->cur, BUFF_ROOM (buff));
1818 return new_buff;
1821 /* Creates a new buffer with enough space to hold the uncommitted
1822 remaining bytes of the buffer pointed to by BUFF, and at least
1823 MIN_EXTRA more bytes. Copies the excess bytes to the new buffer.
1824 Chains the new buffer before the buffer pointed to by BUFF, and
1825 updates the pointer to point to the new buffer. */
1826 void
1827 _cpp_extend_buff (cpp_reader *pfile, _cpp_buff **pbuff, size_t min_extra)
1829 _cpp_buff *new_buff, *old_buff = *pbuff;
1830 size_t size = EXTENDED_BUFF_SIZE (old_buff, min_extra);
1832 new_buff = _cpp_get_buff (pfile, size);
1833 memcpy (new_buff->base, old_buff->cur, BUFF_ROOM (old_buff));
1834 new_buff->next = old_buff;
1835 *pbuff = new_buff;
1838 /* Free a chain of buffers starting at BUFF. */
1839 void
1840 _cpp_free_buff (_cpp_buff *buff)
1842 _cpp_buff *next;
1844 for (; buff; buff = next)
1846 next = buff->next;
1847 free (buff->base);
1851 /* Allocate permanent, unaligned storage of length LEN. */
1852 unsigned char *
1853 _cpp_unaligned_alloc (cpp_reader *pfile, size_t len)
1855 _cpp_buff *buff = pfile->u_buff;
1856 unsigned char *result = buff->cur;
1858 if (len > (size_t) (buff->limit - result))
1860 buff = _cpp_get_buff (pfile, len);
1861 buff->next = pfile->u_buff;
1862 pfile->u_buff = buff;
1863 result = buff->cur;
1866 buff->cur = result + len;
1867 return result;
1870 /* Allocate permanent, unaligned storage of length LEN from a_buff.
1871 That buffer is used for growing allocations when saving macro
1872 replacement lists in a #define, and when parsing an answer to an
1873 assertion in #assert, #unassert or #if (and therefore possibly
1874 whilst expanding macros). It therefore must not be used by any
1875 code that they might call: specifically the lexer and the guts of
1876 the macro expander.
1878 All existing other uses clearly fit this restriction: storing
1879 registered pragmas during initialization. */
1880 unsigned char *
1881 _cpp_aligned_alloc (cpp_reader *pfile, size_t len)
1883 _cpp_buff *buff = pfile->a_buff;
1884 unsigned char *result = buff->cur;
1886 if (len > (size_t) (buff->limit - result))
1888 buff = _cpp_get_buff (pfile, len);
1889 buff->next = pfile->a_buff;
1890 pfile->a_buff = buff;
1891 result = buff->cur;
1894 buff->cur = result + len;
1895 return result;
1898 /* Say which field of TOK is in use. */
1900 enum cpp_token_fld_kind
1901 cpp_token_val_index (cpp_token *tok)
1903 switch (TOKEN_SPELL (tok))
1905 case SPELL_IDENT:
1906 return CPP_TOKEN_FLD_NODE;
1907 case SPELL_LITERAL:
1908 return CPP_TOKEN_FLD_STR;
1909 case SPELL_OPERATOR:
1910 if (tok->type == CPP_PASTE)
1911 return CPP_TOKEN_FLD_TOKEN_NO;
1912 else
1913 return CPP_TOKEN_FLD_NONE;
1914 case SPELL_NONE:
1915 if (tok->type == CPP_MACRO_ARG)
1916 return CPP_TOKEN_FLD_ARG_NO;
1917 else if (tok->type == CPP_PADDING)
1918 return CPP_TOKEN_FLD_SOURCE;
1919 else if (tok->type == CPP_PRAGMA)
1920 return CPP_TOKEN_FLD_PRAGMA;
1921 /* else fall through */
1922 default:
1923 return CPP_TOKEN_FLD_NONE;