libcpp/
[official-gcc.git] / libcpp / lex.c
blob452e8eafce59bd4ae182d3851631648d865bb6b5
1 /* CPP Library - lexical analysis.
2 Copyright (C) 2000, 2001, 2002, 2003, 2004, 2005, 2007, 2008 Free Software Foundation, Inc.
3 Contributed by Per Bothner, 1994-95.
4 Based on CCCP program by Paul Rubin, June 1986
5 Adapted to ANSI C, Richard Stallman, Jan 1987
6 Broken out to separate file, Zack Weinberg, Mar 2000
8 This program is free software; you can redistribute it and/or modify it
9 under the terms of the GNU General Public License as published by the
10 Free Software Foundation; either version 2, or (at your option) any
11 later version.
13 This program is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with this program; if not, write to the Free Software
20 Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
22 #include "config.h"
23 #include "system.h"
24 #include "cpplib.h"
25 #include "internal.h"
27 enum spell_type
29 SPELL_OPERATOR = 0,
30 SPELL_IDENT,
31 SPELL_LITERAL,
32 SPELL_NONE
35 struct token_spelling
37 enum spell_type category;
38 const unsigned char *name;
41 static const unsigned char *const digraph_spellings[] =
42 { UC"%:", UC"%:%:", UC"<:", UC":>", UC"<%", UC"%>" };
44 #define OP(e, s) { SPELL_OPERATOR, UC s },
45 #define TK(e, s) { SPELL_ ## s, UC #e },
46 static const struct token_spelling token_spellings[N_TTYPES] = { TTYPE_TABLE };
47 #undef OP
48 #undef TK
50 #define TOKEN_SPELL(token) (token_spellings[(token)->type].category)
51 #define TOKEN_NAME(token) (token_spellings[(token)->type].name)
53 static void add_line_note (cpp_buffer *, const uchar *, unsigned int);
54 static int skip_line_comment (cpp_reader *);
55 static void skip_whitespace (cpp_reader *, cppchar_t);
56 static void lex_string (cpp_reader *, cpp_token *, const uchar *);
57 static void save_comment (cpp_reader *, cpp_token *, const uchar *, cppchar_t);
58 static void store_comment (cpp_reader *, cpp_token *);
59 static void create_literal (cpp_reader *, cpp_token *, const uchar *,
60 unsigned int, enum cpp_ttype);
61 static bool warn_in_comment (cpp_reader *, _cpp_line_note *);
62 static int name_p (cpp_reader *, const cpp_string *);
63 static tokenrun *next_tokenrun (tokenrun *);
65 static _cpp_buff *new_buff (size_t);
68 /* Utility routine:
70 Compares, the token TOKEN to the NUL-terminated string STRING.
71 TOKEN must be a CPP_NAME. Returns 1 for equal, 0 for unequal. */
72 int
73 cpp_ideq (const cpp_token *token, const char *string)
75 if (token->type != CPP_NAME)
76 return 0;
78 return !ustrcmp (NODE_NAME (token->val.node), (const uchar *) string);
81 /* Record a note TYPE at byte POS into the current cleaned logical
82 line. */
83 static void
84 add_line_note (cpp_buffer *buffer, const uchar *pos, unsigned int type)
86 if (buffer->notes_used == buffer->notes_cap)
88 buffer->notes_cap = buffer->notes_cap * 2 + 200;
89 buffer->notes = XRESIZEVEC (_cpp_line_note, buffer->notes,
90 buffer->notes_cap);
93 buffer->notes[buffer->notes_used].pos = pos;
94 buffer->notes[buffer->notes_used].type = type;
95 buffer->notes_used++;
98 /* Returns with a logical line that contains no escaped newlines or
99 trigraphs. This is a time-critical inner loop. */
100 void
101 _cpp_clean_line (cpp_reader *pfile)
103 cpp_buffer *buffer;
104 const uchar *s;
105 uchar c, *d, *p;
107 buffer = pfile->buffer;
108 buffer->cur_note = buffer->notes_used = 0;
109 buffer->cur = buffer->line_base = buffer->next_line;
110 buffer->need_line = false;
111 s = buffer->next_line - 1;
113 if (!buffer->from_stage3)
115 const uchar *pbackslash = NULL;
117 /* Short circuit for the common case of an un-escaped line with
118 no trigraphs. The primary win here is by not writing any
119 data back to memory until we have to. */
120 for (;;)
122 c = *++s;
123 if (__builtin_expect (c == '\n', false)
124 || __builtin_expect (c == '\r', false))
126 d = (uchar *) s;
128 if (__builtin_expect (s == buffer->rlimit, false))
129 goto done;
131 /* DOS line ending? */
132 if (__builtin_expect (c == '\r', false)
133 && s[1] == '\n')
135 s++;
136 if (s == buffer->rlimit)
137 goto done;
140 if (__builtin_expect (pbackslash == NULL, true))
141 goto done;
143 /* Check for escaped newline. */
144 p = d;
145 while (is_nvspace (p[-1]))
146 p--;
147 if (p - 1 != pbackslash)
148 goto done;
150 /* Have an escaped newline; process it and proceed to
151 the slow path. */
152 add_line_note (buffer, p - 1, p != d ? ' ' : '\\');
153 d = p - 2;
154 buffer->next_line = p - 1;
155 break;
157 if (__builtin_expect (c == '\\', false))
158 pbackslash = s;
159 else if (__builtin_expect (c == '?', false)
160 && __builtin_expect (s[1] == '?', false)
161 && _cpp_trigraph_map[s[2]])
163 /* Have a trigraph. We may or may not have to convert
164 it. Add a line note regardless, for -Wtrigraphs. */
165 add_line_note (buffer, s, s[2]);
166 if (CPP_OPTION (pfile, trigraphs))
168 /* We do, and that means we have to switch to the
169 slow path. */
170 d = (uchar *) s;
171 *d = _cpp_trigraph_map[s[2]];
172 s += 2;
173 break;
179 for (;;)
181 c = *++s;
182 *++d = c;
184 if (c == '\n' || c == '\r')
186 /* Handle DOS line endings. */
187 if (c == '\r' && s != buffer->rlimit && s[1] == '\n')
188 s++;
189 if (s == buffer->rlimit)
190 break;
192 /* Escaped? */
193 p = d;
194 while (p != buffer->next_line && is_nvspace (p[-1]))
195 p--;
196 if (p == buffer->next_line || p[-1] != '\\')
197 break;
199 add_line_note (buffer, p - 1, p != d ? ' ': '\\');
200 d = p - 2;
201 buffer->next_line = p - 1;
203 else if (c == '?' && s[1] == '?' && _cpp_trigraph_map[s[2]])
205 /* Add a note regardless, for the benefit of -Wtrigraphs. */
206 add_line_note (buffer, d, s[2]);
207 if (CPP_OPTION (pfile, trigraphs))
209 *d = _cpp_trigraph_map[s[2]];
210 s += 2;
215 else
218 s++;
219 while (*s != '\n' && *s != '\r');
220 d = (uchar *) s;
222 /* Handle DOS line endings. */
223 if (*s == '\r' && s != buffer->rlimit && s[1] == '\n')
224 s++;
227 done:
228 *d = '\n';
229 /* A sentinel note that should never be processed. */
230 add_line_note (buffer, d + 1, '\n');
231 buffer->next_line = s + 1;
234 /* Return true if the trigraph indicated by NOTE should be warned
235 about in a comment. */
236 static bool
237 warn_in_comment (cpp_reader *pfile, _cpp_line_note *note)
239 const uchar *p;
241 /* Within comments we don't warn about trigraphs, unless the
242 trigraph forms an escaped newline, as that may change
243 behavior. */
244 if (note->type != '/')
245 return false;
247 /* If -trigraphs, then this was an escaped newline iff the next note
248 is coincident. */
249 if (CPP_OPTION (pfile, trigraphs))
250 return note[1].pos == note->pos;
252 /* Otherwise, see if this forms an escaped newline. */
253 p = note->pos + 3;
254 while (is_nvspace (*p))
255 p++;
257 /* There might have been escaped newlines between the trigraph and the
258 newline we found. Hence the position test. */
259 return (*p == '\n' && p < note[1].pos);
262 /* Process the notes created by add_line_note as far as the current
263 location. */
264 void
265 _cpp_process_line_notes (cpp_reader *pfile, int in_comment)
267 cpp_buffer *buffer = pfile->buffer;
269 for (;;)
271 _cpp_line_note *note = &buffer->notes[buffer->cur_note];
272 unsigned int col;
274 if (note->pos > buffer->cur)
275 break;
277 buffer->cur_note++;
278 col = CPP_BUF_COLUMN (buffer, note->pos + 1);
280 if (note->type == '\\' || note->type == ' ')
282 if (note->type == ' ' && !in_comment)
283 cpp_error_with_line (pfile, CPP_DL_WARNING, pfile->line_table->highest_line, col,
284 "backslash and newline separated by space");
286 if (buffer->next_line > buffer->rlimit)
288 cpp_error_with_line (pfile, CPP_DL_PEDWARN, pfile->line_table->highest_line, col,
289 "backslash-newline at end of file");
290 /* Prevent "no newline at end of file" warning. */
291 buffer->next_line = buffer->rlimit;
294 buffer->line_base = note->pos;
295 CPP_INCREMENT_LINE (pfile, 0);
297 else if (_cpp_trigraph_map[note->type])
299 if (CPP_OPTION (pfile, warn_trigraphs)
300 && (!in_comment || warn_in_comment (pfile, note)))
302 if (CPP_OPTION (pfile, trigraphs))
303 cpp_error_with_line (pfile, CPP_DL_WARNING, pfile->line_table->highest_line, col,
304 "trigraph ??%c converted to %c",
305 note->type,
306 (int) _cpp_trigraph_map[note->type]);
307 else
309 cpp_error_with_line
310 (pfile, CPP_DL_WARNING, pfile->line_table->highest_line, col,
311 "trigraph ??%c ignored, use -trigraphs to enable",
312 note->type);
316 else
317 abort ();
321 /* Skip a C-style block comment. We find the end of the comment by
322 seeing if an asterisk is before every '/' we encounter. Returns
323 nonzero if comment terminated by EOF, zero otherwise.
325 Buffer->cur points to the initial asterisk of the comment. */
326 bool
327 _cpp_skip_block_comment (cpp_reader *pfile)
329 cpp_buffer *buffer = pfile->buffer;
330 const uchar *cur = buffer->cur;
331 uchar c;
333 cur++;
334 if (*cur == '/')
335 cur++;
337 for (;;)
339 /* People like decorating comments with '*', so check for '/'
340 instead for efficiency. */
341 c = *cur++;
343 if (c == '/')
345 if (cur[-2] == '*')
346 break;
348 /* Warn about potential nested comments, but not if the '/'
349 comes immediately before the true comment delimiter.
350 Don't bother to get it right across escaped newlines. */
351 if (CPP_OPTION (pfile, warn_comments)
352 && cur[0] == '*' && cur[1] != '/')
354 buffer->cur = cur;
355 cpp_error_with_line (pfile, CPP_DL_WARNING,
356 pfile->line_table->highest_line, CPP_BUF_COL (buffer),
357 "\"/*\" within comment");
360 else if (c == '\n')
362 unsigned int cols;
363 buffer->cur = cur - 1;
364 _cpp_process_line_notes (pfile, true);
365 if (buffer->next_line >= buffer->rlimit)
366 return true;
367 _cpp_clean_line (pfile);
369 cols = buffer->next_line - buffer->line_base;
370 CPP_INCREMENT_LINE (pfile, cols);
372 cur = buffer->cur;
376 buffer->cur = cur;
377 _cpp_process_line_notes (pfile, true);
378 return false;
381 /* Skip a C++ line comment, leaving buffer->cur pointing to the
382 terminating newline. Handles escaped newlines. Returns nonzero
383 if a multiline comment. */
384 static int
385 skip_line_comment (cpp_reader *pfile)
387 cpp_buffer *buffer = pfile->buffer;
388 source_location orig_line = pfile->line_table->highest_line;
390 while (*buffer->cur != '\n')
391 buffer->cur++;
393 _cpp_process_line_notes (pfile, true);
394 return orig_line != pfile->line_table->highest_line;
397 /* Skips whitespace, saving the next non-whitespace character. */
398 static void
399 skip_whitespace (cpp_reader *pfile, cppchar_t c)
401 cpp_buffer *buffer = pfile->buffer;
402 bool saw_NUL = false;
406 /* Horizontal space always OK. */
407 if (c == ' ' || c == '\t')
409 /* Just \f \v or \0 left. */
410 else if (c == '\0')
411 saw_NUL = true;
412 else if (pfile->state.in_directive && CPP_PEDANTIC (pfile))
413 cpp_error_with_line (pfile, CPP_DL_PEDWARN, pfile->line_table->highest_line,
414 CPP_BUF_COL (buffer),
415 "%s in preprocessing directive",
416 c == '\f' ? "form feed" : "vertical tab");
418 c = *buffer->cur++;
420 /* We only want non-vertical space, i.e. ' ' \t \f \v \0. */
421 while (is_nvspace (c));
423 if (saw_NUL)
424 cpp_error (pfile, CPP_DL_WARNING, "null character(s) ignored");
426 buffer->cur--;
429 /* See if the characters of a number token are valid in a name (no
430 '.', '+' or '-'). */
431 static int
432 name_p (cpp_reader *pfile, const cpp_string *string)
434 unsigned int i;
436 for (i = 0; i < string->len; i++)
437 if (!is_idchar (string->text[i]))
438 return 0;
440 return 1;
443 /* After parsing an identifier or other sequence, produce a warning about
444 sequences not in NFC/NFKC. */
445 static void
446 warn_about_normalization (cpp_reader *pfile,
447 const cpp_token *token,
448 const struct normalize_state *s)
450 if (CPP_OPTION (pfile, warn_normalize) < NORMALIZE_STATE_RESULT (s)
451 && !pfile->state.skipping)
453 /* Make sure that the token is printed using UCNs, even
454 if we'd otherwise happily print UTF-8. */
455 unsigned char *buf = XNEWVEC (unsigned char, cpp_token_len (token));
456 size_t sz;
458 sz = cpp_spell_token (pfile, token, buf, false) - buf;
459 if (NORMALIZE_STATE_RESULT (s) == normalized_C)
460 cpp_error_with_line (pfile, CPP_DL_WARNING, token->src_loc, 0,
461 "`%.*s' is not in NFKC", (int) sz, buf);
462 else
463 cpp_error_with_line (pfile, CPP_DL_WARNING, token->src_loc, 0,
464 "`%.*s' is not in NFC", (int) sz, buf);
468 /* Returns TRUE if the sequence starting at buffer->cur is invalid in
469 an identifier. FIRST is TRUE if this starts an identifier. */
470 static bool
471 forms_identifier_p (cpp_reader *pfile, int first,
472 struct normalize_state *state)
474 cpp_buffer *buffer = pfile->buffer;
476 if (*buffer->cur == '$')
478 if (!CPP_OPTION (pfile, dollars_in_ident))
479 return false;
481 buffer->cur++;
482 if (CPP_OPTION (pfile, warn_dollars) && !pfile->state.skipping)
484 CPP_OPTION (pfile, warn_dollars) = 0;
485 cpp_error (pfile, CPP_DL_PEDWARN, "'$' in identifier or number");
488 return true;
491 /* Is this a syntactically valid UCN? */
492 if (CPP_OPTION (pfile, extended_identifiers)
493 && *buffer->cur == '\\'
494 && (buffer->cur[1] == 'u' || buffer->cur[1] == 'U'))
496 buffer->cur += 2;
497 if (_cpp_valid_ucn (pfile, &buffer->cur, buffer->rlimit, 1 + !first,
498 state))
499 return true;
500 buffer->cur -= 2;
503 return false;
506 /* Lex an identifier starting at BUFFER->CUR - 1. */
507 static cpp_hashnode *
508 lex_identifier (cpp_reader *pfile, const uchar *base, bool starts_ucn,
509 struct normalize_state *nst)
511 cpp_hashnode *result;
512 const uchar *cur;
513 unsigned int len;
514 unsigned int hash = HT_HASHSTEP (0, *base);
516 cur = pfile->buffer->cur;
517 if (! starts_ucn)
518 while (ISIDNUM (*cur))
520 hash = HT_HASHSTEP (hash, *cur);
521 cur++;
523 pfile->buffer->cur = cur;
524 if (starts_ucn || forms_identifier_p (pfile, false, nst))
526 /* Slower version for identifiers containing UCNs (or $). */
527 do {
528 while (ISIDNUM (*pfile->buffer->cur))
530 pfile->buffer->cur++;
531 NORMALIZE_STATE_UPDATE_IDNUM (nst);
533 } while (forms_identifier_p (pfile, false, nst));
534 result = _cpp_interpret_identifier (pfile, base,
535 pfile->buffer->cur - base);
537 else
539 len = cur - base;
540 hash = HT_HASHFINISH (hash, len);
542 result = CPP_HASHNODE (ht_lookup_with_hash (pfile->hash_table,
543 base, len, hash, HT_ALLOC));
546 /* Rarely, identifiers require diagnostics when lexed. */
547 if (__builtin_expect ((result->flags & NODE_DIAGNOSTIC)
548 && !pfile->state.skipping, 0))
550 /* It is allowed to poison the same identifier twice. */
551 if ((result->flags & NODE_POISONED) && !pfile->state.poisoned_ok)
552 cpp_error (pfile, CPP_DL_ERROR, "attempt to use poisoned \"%s\"",
553 NODE_NAME (result));
555 /* Constraint 6.10.3.5: __VA_ARGS__ should only appear in the
556 replacement list of a variadic macro. */
557 if (result == pfile->spec_nodes.n__VA_ARGS__
558 && !pfile->state.va_args_ok)
559 cpp_error (pfile, CPP_DL_PEDWARN,
560 "__VA_ARGS__ can only appear in the expansion"
561 " of a C99 variadic macro");
564 return result;
567 /* Lex a number to NUMBER starting at BUFFER->CUR - 1. */
568 static void
569 lex_number (cpp_reader *pfile, cpp_string *number,
570 struct normalize_state *nst)
572 const uchar *cur;
573 const uchar *base;
574 uchar *dest;
576 base = pfile->buffer->cur - 1;
579 cur = pfile->buffer->cur;
581 /* N.B. ISIDNUM does not include $. */
582 while (ISIDNUM (*cur) || *cur == '.' || VALID_SIGN (*cur, cur[-1]))
584 cur++;
585 NORMALIZE_STATE_UPDATE_IDNUM (nst);
588 pfile->buffer->cur = cur;
590 while (forms_identifier_p (pfile, false, nst));
592 number->len = cur - base;
593 dest = _cpp_unaligned_alloc (pfile, number->len + 1);
594 memcpy (dest, base, number->len);
595 dest[number->len] = '\0';
596 number->text = dest;
599 /* Create a token of type TYPE with a literal spelling. */
600 static void
601 create_literal (cpp_reader *pfile, cpp_token *token, const uchar *base,
602 unsigned int len, enum cpp_ttype type)
604 uchar *dest = _cpp_unaligned_alloc (pfile, len + 1);
606 memcpy (dest, base, len);
607 dest[len] = '\0';
608 token->type = type;
609 token->val.str.len = len;
610 token->val.str.text = dest;
613 /* Lexes a string, character constant, or angle-bracketed header file
614 name. The stored string contains the spelling, including opening
615 quote and leading any leading 'L', 'u' or 'U'. It returns the type
616 of the literal, or CPP_OTHER if it was not properly terminated, or
617 CPP_LESS for an unterminated header name which must be relexed as
618 normal tokens.
620 The spelling is NUL-terminated, but it is not guaranteed that this
621 is the first NUL since embedded NULs are preserved. */
622 static void
623 lex_string (cpp_reader *pfile, cpp_token *token, const uchar *base)
625 bool saw_NUL = false;
626 const uchar *cur;
627 cppchar_t terminator;
628 enum cpp_ttype type;
630 cur = base;
631 terminator = *cur++;
632 if (terminator == 'L' || terminator == 'u' || terminator == 'U')
633 terminator = *cur++;
634 if (terminator == '\"')
635 type = (*base == 'L' ? CPP_WSTRING :
636 *base == 'U' ? CPP_STRING32 :
637 *base == 'u' ? CPP_STRING16 : CPP_STRING);
638 else if (terminator == '\'')
639 type = (*base == 'L' ? CPP_WCHAR :
640 *base == 'U' ? CPP_CHAR32 :
641 *base == 'u' ? CPP_CHAR16 : CPP_CHAR);
642 else
643 terminator = '>', type = CPP_HEADER_NAME;
645 for (;;)
647 cppchar_t c = *cur++;
649 /* In #include-style directives, terminators are not escapable. */
650 if (c == '\\' && !pfile->state.angled_headers && *cur != '\n')
651 cur++;
652 else if (c == terminator)
653 break;
654 else if (c == '\n')
656 cur--;
657 /* Unmatched quotes always yield undefined behavior, but
658 greedy lexing means that what appears to be an unterminated
659 header name may actually be a legitimate sequence of tokens. */
660 if (terminator == '>')
662 token->type = CPP_LESS;
663 return;
665 type = CPP_OTHER;
666 break;
668 else if (c == '\0')
669 saw_NUL = true;
672 if (saw_NUL && !pfile->state.skipping)
673 cpp_error (pfile, CPP_DL_WARNING,
674 "null character(s) preserved in literal");
676 if (type == CPP_OTHER && CPP_OPTION (pfile, lang) != CLK_ASM)
677 cpp_error (pfile, CPP_DL_PEDWARN, "missing terminating %c character",
678 (int) terminator);
680 pfile->buffer->cur = cur;
681 create_literal (pfile, token, base, cur - base, type);
684 /* Return the comment table. The client may not make any assumption
685 about the ordering of the table. */
686 cpp_comment_table *
687 cpp_get_comments (cpp_reader *pfile)
689 return &pfile->comments;
692 /* Append a comment to the end of the comment table. */
693 static void
694 store_comment (cpp_reader *pfile, cpp_token *token)
696 int len;
698 if (pfile->comments.allocated == 0)
700 pfile->comments.allocated = 256;
701 pfile->comments.entries = (cpp_comment *) xmalloc
702 (pfile->comments.allocated * sizeof (cpp_comment));
705 if (pfile->comments.count == pfile->comments.allocated)
707 pfile->comments.allocated *= 2;
708 pfile->comments.entries = (cpp_comment *) xrealloc
709 (pfile->comments.entries,
710 pfile->comments.allocated * sizeof (cpp_comment));
713 len = token->val.str.len;
715 /* Copy comment. Note, token may not be NULL terminated. */
716 pfile->comments.entries[pfile->comments.count].comment =
717 (char *) xmalloc (sizeof (char) * (len + 1));
718 memcpy (pfile->comments.entries[pfile->comments.count].comment,
719 token->val.str.text, len);
720 pfile->comments.entries[pfile->comments.count].comment[len] = '\0';
722 /* Set source location. */
723 pfile->comments.entries[pfile->comments.count].sloc = token->src_loc;
725 /* Increment the count of entries in the comment table. */
726 pfile->comments.count++;
729 /* The stored comment includes the comment start and any terminator. */
730 static void
731 save_comment (cpp_reader *pfile, cpp_token *token, const unsigned char *from,
732 cppchar_t type)
734 unsigned char *buffer;
735 unsigned int len, clen;
737 len = pfile->buffer->cur - from + 1; /* + 1 for the initial '/'. */
739 /* C++ comments probably (not definitely) have moved past a new
740 line, which we don't want to save in the comment. */
741 if (is_vspace (pfile->buffer->cur[-1]))
742 len--;
744 /* If we are currently in a directive, then we need to store all
745 C++ comments as C comments internally, and so we need to
746 allocate a little extra space in that case.
748 Note that the only time we encounter a directive here is
749 when we are saving comments in a "#define". */
750 clen = (pfile->state.in_directive && type == '/') ? len + 2 : len;
752 buffer = _cpp_unaligned_alloc (pfile, clen);
754 token->type = CPP_COMMENT;
755 token->val.str.len = clen;
756 token->val.str.text = buffer;
758 buffer[0] = '/';
759 memcpy (buffer + 1, from, len - 1);
761 /* Finish conversion to a C comment, if necessary. */
762 if (pfile->state.in_directive && type == '/')
764 buffer[1] = '*';
765 buffer[clen - 2] = '*';
766 buffer[clen - 1] = '/';
769 /* Finally store this comment for use by clients of libcpp. */
770 store_comment (pfile, token);
773 /* Allocate COUNT tokens for RUN. */
774 void
775 _cpp_init_tokenrun (tokenrun *run, unsigned int count)
777 run->base = XNEWVEC (cpp_token, count);
778 run->limit = run->base + count;
779 run->next = NULL;
782 /* Returns the next tokenrun, or creates one if there is none. */
783 static tokenrun *
784 next_tokenrun (tokenrun *run)
786 if (run->next == NULL)
788 run->next = XNEW (tokenrun);
789 run->next->prev = run;
790 _cpp_init_tokenrun (run->next, 250);
793 return run->next;
796 /* Look ahead in the input stream. */
797 const cpp_token *
798 cpp_peek_token (cpp_reader *pfile, int index)
800 cpp_context *context = pfile->context;
801 const cpp_token *peektok;
802 int count;
804 /* First, scan through any pending cpp_context objects. */
805 while (context->prev)
807 ptrdiff_t sz = (context->direct_p
808 ? LAST (context).token - FIRST (context).token
809 : LAST (context).ptoken - FIRST (context).ptoken);
811 if (index < (int) sz)
812 return (context->direct_p
813 ? FIRST (context).token + index
814 : *(FIRST (context).ptoken + index));
816 index -= (int) sz;
817 context = context->prev;
820 /* We will have to read some new tokens after all (and do so
821 without invalidating preceding tokens). */
822 count = index;
823 pfile->keep_tokens++;
827 peektok = _cpp_lex_token (pfile);
828 if (peektok->type == CPP_EOF)
829 return peektok;
831 while (index--);
833 _cpp_backup_tokens_direct (pfile, count + 1);
834 pfile->keep_tokens--;
836 return peektok;
839 /* Allocate a single token that is invalidated at the same time as the
840 rest of the tokens on the line. Has its line and col set to the
841 same as the last lexed token, so that diagnostics appear in the
842 right place. */
843 cpp_token *
844 _cpp_temp_token (cpp_reader *pfile)
846 cpp_token *old, *result;
847 ptrdiff_t sz = pfile->cur_run->limit - pfile->cur_token;
848 ptrdiff_t la = (ptrdiff_t) pfile->lookaheads;
850 old = pfile->cur_token - 1;
851 /* Any pre-existing lookaheads must not be clobbered. */
852 if (la)
854 if (sz <= la)
856 tokenrun *next = next_tokenrun (pfile->cur_run);
858 if (sz < la)
859 memmove (next->base + 1, next->base,
860 (la - sz) * sizeof (cpp_token));
862 next->base[0] = pfile->cur_run->limit[-1];
865 if (sz > 1)
866 memmove (pfile->cur_token + 1, pfile->cur_token,
867 MIN (la, sz - 1) * sizeof (cpp_token));
870 if (!sz && pfile->cur_token == pfile->cur_run->limit)
872 pfile->cur_run = next_tokenrun (pfile->cur_run);
873 pfile->cur_token = pfile->cur_run->base;
876 result = pfile->cur_token++;
877 result->src_loc = old->src_loc;
878 return result;
881 /* Lex a token into RESULT (external interface). Takes care of issues
882 like directive handling, token lookahead, multiple include
883 optimization and skipping. */
884 const cpp_token *
885 _cpp_lex_token (cpp_reader *pfile)
887 cpp_token *result;
889 for (;;)
891 if (pfile->cur_token == pfile->cur_run->limit)
893 pfile->cur_run = next_tokenrun (pfile->cur_run);
894 pfile->cur_token = pfile->cur_run->base;
896 /* We assume that the current token is somewhere in the current
897 run. */
898 if (pfile->cur_token < pfile->cur_run->base
899 || pfile->cur_token >= pfile->cur_run->limit)
900 abort ();
902 if (pfile->lookaheads)
904 pfile->lookaheads--;
905 result = pfile->cur_token++;
907 else
908 result = _cpp_lex_direct (pfile);
910 if (result->flags & BOL)
912 /* Is this a directive. If _cpp_handle_directive returns
913 false, it is an assembler #. */
914 if (result->type == CPP_HASH
915 /* 6.10.3 p 11: Directives in a list of macro arguments
916 gives undefined behavior. This implementation
917 handles the directive as normal. */
918 && pfile->state.parsing_args != 1)
920 if (_cpp_handle_directive (pfile, result->flags & PREV_WHITE))
922 if (pfile->directive_result.type == CPP_PADDING)
923 continue;
924 result = &pfile->directive_result;
927 else if (pfile->state.in_deferred_pragma)
928 result = &pfile->directive_result;
930 if (pfile->cb.line_change && !pfile->state.skipping)
931 pfile->cb.line_change (pfile, result, pfile->state.parsing_args);
934 /* We don't skip tokens in directives. */
935 if (pfile->state.in_directive || pfile->state.in_deferred_pragma)
936 break;
938 /* Outside a directive, invalidate controlling macros. At file
939 EOF, _cpp_lex_direct takes care of popping the buffer, so we never
940 get here and MI optimization works. */
941 pfile->mi_valid = false;
943 if (!pfile->state.skipping || result->type == CPP_EOF)
944 break;
947 return result;
950 /* Returns true if a fresh line has been loaded. */
951 bool
952 _cpp_get_fresh_line (cpp_reader *pfile)
954 int return_at_eof;
956 /* We can't get a new line until we leave the current directive. */
957 if (pfile->state.in_directive)
958 return false;
960 for (;;)
962 cpp_buffer *buffer = pfile->buffer;
964 if (!buffer->need_line)
965 return true;
967 if (buffer->next_line < buffer->rlimit)
969 _cpp_clean_line (pfile);
970 return true;
973 /* First, get out of parsing arguments state. */
974 if (pfile->state.parsing_args)
975 return false;
977 /* End of buffer. Non-empty files should end in a newline. */
978 if (buffer->buf != buffer->rlimit
979 && buffer->next_line > buffer->rlimit
980 && !buffer->from_stage3)
982 /* Clip to buffer size. */
983 buffer->next_line = buffer->rlimit;
986 return_at_eof = buffer->return_at_eof;
987 _cpp_pop_buffer (pfile);
988 if (pfile->buffer == NULL || return_at_eof)
989 return false;
993 #define IF_NEXT_IS(CHAR, THEN_TYPE, ELSE_TYPE) \
994 do \
996 result->type = ELSE_TYPE; \
997 if (*buffer->cur == CHAR) \
998 buffer->cur++, result->type = THEN_TYPE; \
1000 while (0)
1002 /* Lex a token into pfile->cur_token, which is also incremented, to
1003 get diagnostics pointing to the correct location.
1005 Does not handle issues such as token lookahead, multiple-include
1006 optimization, directives, skipping etc. This function is only
1007 suitable for use by _cpp_lex_token, and in special cases like
1008 lex_expansion_token which doesn't care for any of these issues.
1010 When meeting a newline, returns CPP_EOF if parsing a directive,
1011 otherwise returns to the start of the token buffer if permissible.
1012 Returns the location of the lexed token. */
1013 cpp_token *
1014 _cpp_lex_direct (cpp_reader *pfile)
1016 cppchar_t c;
1017 cpp_buffer *buffer;
1018 const unsigned char *comment_start;
1019 cpp_token *result = pfile->cur_token++;
1021 fresh_line:
1022 result->flags = 0;
1023 buffer = pfile->buffer;
1024 if (buffer->need_line)
1026 if (pfile->state.in_deferred_pragma)
1028 result->type = CPP_PRAGMA_EOL;
1029 pfile->state.in_deferred_pragma = false;
1030 if (!pfile->state.pragma_allow_expansion)
1031 pfile->state.prevent_expansion--;
1032 return result;
1034 if (!_cpp_get_fresh_line (pfile))
1036 result->type = CPP_EOF;
1037 if (!pfile->state.in_directive)
1039 /* Tell the compiler the line number of the EOF token. */
1040 result->src_loc = pfile->line_table->highest_line;
1041 result->flags = BOL;
1043 return result;
1045 if (!pfile->keep_tokens)
1047 pfile->cur_run = &pfile->base_run;
1048 result = pfile->base_run.base;
1049 pfile->cur_token = result + 1;
1051 result->flags = BOL;
1052 if (pfile->state.parsing_args == 2)
1053 result->flags |= PREV_WHITE;
1055 buffer = pfile->buffer;
1056 update_tokens_line:
1057 result->src_loc = pfile->line_table->highest_line;
1059 skipped_white:
1060 if (buffer->cur >= buffer->notes[buffer->cur_note].pos
1061 && !pfile->overlaid_buffer)
1063 _cpp_process_line_notes (pfile, false);
1064 result->src_loc = pfile->line_table->highest_line;
1066 c = *buffer->cur++;
1068 LINEMAP_POSITION_FOR_COLUMN (result->src_loc, pfile->line_table,
1069 CPP_BUF_COLUMN (buffer, buffer->cur));
1071 switch (c)
1073 case ' ': case '\t': case '\f': case '\v': case '\0':
1074 result->flags |= PREV_WHITE;
1075 skip_whitespace (pfile, c);
1076 goto skipped_white;
1078 case '\n':
1079 if (buffer->cur < buffer->rlimit)
1080 CPP_INCREMENT_LINE (pfile, 0);
1081 buffer->need_line = true;
1082 goto fresh_line;
1084 case '0': case '1': case '2': case '3': case '4':
1085 case '5': case '6': case '7': case '8': case '9':
1087 struct normalize_state nst = INITIAL_NORMALIZE_STATE;
1088 result->type = CPP_NUMBER;
1089 lex_number (pfile, &result->val.str, &nst);
1090 warn_about_normalization (pfile, result, &nst);
1091 break;
1094 case 'L':
1095 case 'u':
1096 case 'U':
1097 /* 'L', 'u' or 'U' may introduce wide characters or strings. */
1098 if (c == 'L' || CPP_OPTION (pfile, uliterals))
1100 if (*buffer->cur == '\'' || *buffer->cur == '"')
1102 lex_string (pfile, result, buffer->cur - 1);
1103 break;
1106 /* Fall through. */
1108 case '_':
1109 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
1110 case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
1111 case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
1112 case 's': case 't': case 'v': case 'w': case 'x':
1113 case 'y': case 'z':
1114 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
1115 case 'G': case 'H': case 'I': case 'J': case 'K':
1116 case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
1117 case 'S': case 'T': case 'V': case 'W': case 'X':
1118 case 'Y': case 'Z':
1119 result->type = CPP_NAME;
1121 struct normalize_state nst = INITIAL_NORMALIZE_STATE;
1122 result->val.node = lex_identifier (pfile, buffer->cur - 1, false,
1123 &nst);
1124 warn_about_normalization (pfile, result, &nst);
1127 /* Convert named operators to their proper types. */
1128 if (result->val.node->flags & NODE_OPERATOR)
1130 result->flags |= NAMED_OP;
1131 result->type = (enum cpp_ttype) result->val.node->directive_index;
1133 break;
1135 case '\'':
1136 case '"':
1137 lex_string (pfile, result, buffer->cur - 1);
1138 break;
1140 case '/':
1141 /* A potential block or line comment. */
1142 comment_start = buffer->cur;
1143 c = *buffer->cur;
1145 if (c == '*')
1147 if (_cpp_skip_block_comment (pfile))
1148 cpp_error (pfile, CPP_DL_ERROR, "unterminated comment");
1150 else if (c == '/' && (CPP_OPTION (pfile, cplusplus_comments)
1151 || cpp_in_system_header (pfile)))
1153 /* Warn about comments only if pedantically GNUC89, and not
1154 in system headers. */
1155 if (CPP_OPTION (pfile, lang) == CLK_GNUC89 && CPP_PEDANTIC (pfile)
1156 && ! buffer->warned_cplusplus_comments)
1158 cpp_error (pfile, CPP_DL_PEDWARN,
1159 "C++ style comments are not allowed in ISO C90");
1160 cpp_error (pfile, CPP_DL_PEDWARN,
1161 "(this will be reported only once per input file)");
1162 buffer->warned_cplusplus_comments = 1;
1165 if (skip_line_comment (pfile) && CPP_OPTION (pfile, warn_comments))
1166 cpp_error (pfile, CPP_DL_WARNING, "multi-line comment");
1168 else if (c == '=')
1170 buffer->cur++;
1171 result->type = CPP_DIV_EQ;
1172 break;
1174 else
1176 result->type = CPP_DIV;
1177 break;
1180 if (!pfile->state.save_comments)
1182 result->flags |= PREV_WHITE;
1183 goto update_tokens_line;
1186 /* Save the comment as a token in its own right. */
1187 save_comment (pfile, result, comment_start, c);
1188 break;
1190 case '<':
1191 if (pfile->state.angled_headers)
1193 lex_string (pfile, result, buffer->cur - 1);
1194 if (result->type != CPP_LESS)
1195 break;
1198 result->type = CPP_LESS;
1199 if (*buffer->cur == '=')
1200 buffer->cur++, result->type = CPP_LESS_EQ;
1201 else if (*buffer->cur == '<')
1203 buffer->cur++;
1204 IF_NEXT_IS ('=', CPP_LSHIFT_EQ, CPP_LSHIFT);
1206 else if (CPP_OPTION (pfile, digraphs))
1208 if (*buffer->cur == ':')
1210 buffer->cur++;
1211 result->flags |= DIGRAPH;
1212 result->type = CPP_OPEN_SQUARE;
1214 else if (*buffer->cur == '%')
1216 buffer->cur++;
1217 result->flags |= DIGRAPH;
1218 result->type = CPP_OPEN_BRACE;
1221 break;
1223 case '>':
1224 result->type = CPP_GREATER;
1225 if (*buffer->cur == '=')
1226 buffer->cur++, result->type = CPP_GREATER_EQ;
1227 else if (*buffer->cur == '>')
1229 buffer->cur++;
1230 IF_NEXT_IS ('=', CPP_RSHIFT_EQ, CPP_RSHIFT);
1232 break;
1234 case '%':
1235 result->type = CPP_MOD;
1236 if (*buffer->cur == '=')
1237 buffer->cur++, result->type = CPP_MOD_EQ;
1238 else if (CPP_OPTION (pfile, digraphs))
1240 if (*buffer->cur == ':')
1242 buffer->cur++;
1243 result->flags |= DIGRAPH;
1244 result->type = CPP_HASH;
1245 if (*buffer->cur == '%' && buffer->cur[1] == ':')
1246 buffer->cur += 2, result->type = CPP_PASTE;
1248 else if (*buffer->cur == '>')
1250 buffer->cur++;
1251 result->flags |= DIGRAPH;
1252 result->type = CPP_CLOSE_BRACE;
1255 break;
1257 case '.':
1258 result->type = CPP_DOT;
1259 if (ISDIGIT (*buffer->cur))
1261 struct normalize_state nst = INITIAL_NORMALIZE_STATE;
1262 result->type = CPP_NUMBER;
1263 lex_number (pfile, &result->val.str, &nst);
1264 warn_about_normalization (pfile, result, &nst);
1266 else if (*buffer->cur == '.' && buffer->cur[1] == '.')
1267 buffer->cur += 2, result->type = CPP_ELLIPSIS;
1268 else if (*buffer->cur == '*' && CPP_OPTION (pfile, cplusplus))
1269 buffer->cur++, result->type = CPP_DOT_STAR;
1270 break;
1272 case '+':
1273 result->type = CPP_PLUS;
1274 if (*buffer->cur == '+')
1275 buffer->cur++, result->type = CPP_PLUS_PLUS;
1276 else if (*buffer->cur == '=')
1277 buffer->cur++, result->type = CPP_PLUS_EQ;
1278 break;
1280 case '-':
1281 result->type = CPP_MINUS;
1282 if (*buffer->cur == '>')
1284 buffer->cur++;
1285 result->type = CPP_DEREF;
1286 if (*buffer->cur == '*' && CPP_OPTION (pfile, cplusplus))
1287 buffer->cur++, result->type = CPP_DEREF_STAR;
1289 else if (*buffer->cur == '-')
1290 buffer->cur++, result->type = CPP_MINUS_MINUS;
1291 else if (*buffer->cur == '=')
1292 buffer->cur++, result->type = CPP_MINUS_EQ;
1293 break;
1295 case '&':
1296 result->type = CPP_AND;
1297 if (*buffer->cur == '&')
1298 buffer->cur++, result->type = CPP_AND_AND;
1299 else if (*buffer->cur == '=')
1300 buffer->cur++, result->type = CPP_AND_EQ;
1301 break;
1303 case '|':
1304 result->type = CPP_OR;
1305 if (*buffer->cur == '|')
1306 buffer->cur++, result->type = CPP_OR_OR;
1307 else if (*buffer->cur == '=')
1308 buffer->cur++, result->type = CPP_OR_EQ;
1309 break;
1311 case ':':
1312 result->type = CPP_COLON;
1313 if (*buffer->cur == ':' && CPP_OPTION (pfile, cplusplus))
1314 buffer->cur++, result->type = CPP_SCOPE;
1315 else if (*buffer->cur == '>' && CPP_OPTION (pfile, digraphs))
1317 buffer->cur++;
1318 result->flags |= DIGRAPH;
1319 result->type = CPP_CLOSE_SQUARE;
1321 break;
1323 case '*': IF_NEXT_IS ('=', CPP_MULT_EQ, CPP_MULT); break;
1324 case '=': IF_NEXT_IS ('=', CPP_EQ_EQ, CPP_EQ); break;
1325 case '!': IF_NEXT_IS ('=', CPP_NOT_EQ, CPP_NOT); break;
1326 case '^': IF_NEXT_IS ('=', CPP_XOR_EQ, CPP_XOR); break;
1327 case '#': IF_NEXT_IS ('#', CPP_PASTE, CPP_HASH); break;
1329 case '?': result->type = CPP_QUERY; break;
1330 case '~': result->type = CPP_COMPL; break;
1331 case ',': result->type = CPP_COMMA; break;
1332 case '(': result->type = CPP_OPEN_PAREN; break;
1333 case ')': result->type = CPP_CLOSE_PAREN; break;
1334 case '[': result->type = CPP_OPEN_SQUARE; break;
1335 case ']': result->type = CPP_CLOSE_SQUARE; break;
1336 case '{': result->type = CPP_OPEN_BRACE; break;
1337 case '}': result->type = CPP_CLOSE_BRACE; break;
1338 case ';': result->type = CPP_SEMICOLON; break;
1340 /* @ is a punctuator in Objective-C. */
1341 case '@': result->type = CPP_ATSIGN; break;
1343 case '$':
1344 case '\\':
1346 const uchar *base = --buffer->cur;
1347 struct normalize_state nst = INITIAL_NORMALIZE_STATE;
1349 if (forms_identifier_p (pfile, true, &nst))
1351 result->type = CPP_NAME;
1352 result->val.node = lex_identifier (pfile, base, true, &nst);
1353 warn_about_normalization (pfile, result, &nst);
1354 break;
1356 buffer->cur++;
1359 default:
1360 create_literal (pfile, result, buffer->cur - 1, 1, CPP_OTHER);
1361 break;
1364 return result;
1367 /* An upper bound on the number of bytes needed to spell TOKEN.
1368 Does not include preceding whitespace. */
1369 unsigned int
1370 cpp_token_len (const cpp_token *token)
1372 unsigned int len;
1374 switch (TOKEN_SPELL (token))
1376 default: len = 6; break;
1377 case SPELL_LITERAL: len = token->val.str.len; break;
1378 case SPELL_IDENT: len = NODE_LEN (token->val.node) * 10; break;
1381 return len;
1384 /* Parse UTF-8 out of NAMEP and place a \U escape in BUFFER.
1385 Return the number of bytes read out of NAME. (There are always
1386 10 bytes written to BUFFER.) */
1388 static size_t
1389 utf8_to_ucn (unsigned char *buffer, const unsigned char *name)
1391 int j;
1392 int ucn_len = 0;
1393 int ucn_len_c;
1394 unsigned t;
1395 unsigned long utf32;
1397 /* Compute the length of the UTF-8 sequence. */
1398 for (t = *name; t & 0x80; t <<= 1)
1399 ucn_len++;
1401 utf32 = *name & (0x7F >> ucn_len);
1402 for (ucn_len_c = 1; ucn_len_c < ucn_len; ucn_len_c++)
1404 utf32 = (utf32 << 6) | (*++name & 0x3F);
1406 /* Ill-formed UTF-8. */
1407 if ((*name & ~0x3F) != 0x80)
1408 abort ();
1411 *buffer++ = '\\';
1412 *buffer++ = 'U';
1413 for (j = 7; j >= 0; j--)
1414 *buffer++ = "0123456789abcdef"[(utf32 >> (4 * j)) & 0xF];
1415 return ucn_len;
1419 /* Write the spelling of a token TOKEN to BUFFER. The buffer must
1420 already contain the enough space to hold the token's spelling.
1421 Returns a pointer to the character after the last character written.
1422 FORSTRING is true if this is to be the spelling after translation
1423 phase 1 (this is different for UCNs).
1424 FIXME: Would be nice if we didn't need the PFILE argument. */
1425 unsigned char *
1426 cpp_spell_token (cpp_reader *pfile, const cpp_token *token,
1427 unsigned char *buffer, bool forstring)
1429 switch (TOKEN_SPELL (token))
1431 case SPELL_OPERATOR:
1433 const unsigned char *spelling;
1434 unsigned char c;
1436 if (token->flags & DIGRAPH)
1437 spelling
1438 = digraph_spellings[(int) token->type - (int) CPP_FIRST_DIGRAPH];
1439 else if (token->flags & NAMED_OP)
1440 goto spell_ident;
1441 else
1442 spelling = TOKEN_NAME (token);
1444 while ((c = *spelling++) != '\0')
1445 *buffer++ = c;
1447 break;
1449 spell_ident:
1450 case SPELL_IDENT:
1451 if (forstring)
1453 memcpy (buffer, NODE_NAME (token->val.node),
1454 NODE_LEN (token->val.node));
1455 buffer += NODE_LEN (token->val.node);
1457 else
1459 size_t i;
1460 const unsigned char * name = NODE_NAME (token->val.node);
1462 for (i = 0; i < NODE_LEN (token->val.node); i++)
1463 if (name[i] & ~0x7F)
1465 i += utf8_to_ucn (buffer, name + i) - 1;
1466 buffer += 10;
1468 else
1469 *buffer++ = NODE_NAME (token->val.node)[i];
1471 break;
1473 case SPELL_LITERAL:
1474 memcpy (buffer, token->val.str.text, token->val.str.len);
1475 buffer += token->val.str.len;
1476 break;
1478 case SPELL_NONE:
1479 cpp_error (pfile, CPP_DL_ICE,
1480 "unspellable token %s", TOKEN_NAME (token));
1481 break;
1484 return buffer;
1487 /* Returns TOKEN spelt as a null-terminated string. The string is
1488 freed when the reader is destroyed. Useful for diagnostics. */
1489 unsigned char *
1490 cpp_token_as_text (cpp_reader *pfile, const cpp_token *token)
1492 unsigned int len = cpp_token_len (token) + 1;
1493 unsigned char *start = _cpp_unaligned_alloc (pfile, len), *end;
1495 end = cpp_spell_token (pfile, token, start, false);
1496 end[0] = '\0';
1498 return start;
1501 /* Used by C front ends, which really should move to using
1502 cpp_token_as_text. */
1503 const char *
1504 cpp_type2name (enum cpp_ttype type)
1506 return (const char *) token_spellings[type].name;
1509 /* Writes the spelling of token to FP, without any preceding space.
1510 Separated from cpp_spell_token for efficiency - to avoid stdio
1511 double-buffering. */
1512 void
1513 cpp_output_token (const cpp_token *token, FILE *fp)
1515 switch (TOKEN_SPELL (token))
1517 case SPELL_OPERATOR:
1519 const unsigned char *spelling;
1520 int c;
1522 if (token->flags & DIGRAPH)
1523 spelling
1524 = digraph_spellings[(int) token->type - (int) CPP_FIRST_DIGRAPH];
1525 else if (token->flags & NAMED_OP)
1526 goto spell_ident;
1527 else
1528 spelling = TOKEN_NAME (token);
1530 c = *spelling;
1532 putc (c, fp);
1533 while ((c = *++spelling) != '\0');
1535 break;
1537 spell_ident:
1538 case SPELL_IDENT:
1540 size_t i;
1541 const unsigned char * name = NODE_NAME (token->val.node);
1543 for (i = 0; i < NODE_LEN (token->val.node); i++)
1544 if (name[i] & ~0x7F)
1546 unsigned char buffer[10];
1547 i += utf8_to_ucn (buffer, name + i) - 1;
1548 fwrite (buffer, 1, 10, fp);
1550 else
1551 fputc (NODE_NAME (token->val.node)[i], fp);
1553 break;
1555 case SPELL_LITERAL:
1556 fwrite (token->val.str.text, 1, token->val.str.len, fp);
1557 break;
1559 case SPELL_NONE:
1560 /* An error, most probably. */
1561 break;
1565 /* Compare two tokens. */
1567 _cpp_equiv_tokens (const cpp_token *a, const cpp_token *b)
1569 if (a->type == b->type && a->flags == b->flags)
1570 switch (TOKEN_SPELL (a))
1572 default: /* Keep compiler happy. */
1573 case SPELL_OPERATOR:
1574 return 1;
1575 case SPELL_NONE:
1576 return (a->type != CPP_MACRO_ARG || a->val.arg_no == b->val.arg_no);
1577 case SPELL_IDENT:
1578 return a->val.node == b->val.node;
1579 case SPELL_LITERAL:
1580 return (a->val.str.len == b->val.str.len
1581 && !memcmp (a->val.str.text, b->val.str.text,
1582 a->val.str.len));
1585 return 0;
1588 /* Returns nonzero if a space should be inserted to avoid an
1589 accidental token paste for output. For simplicity, it is
1590 conservative, and occasionally advises a space where one is not
1591 needed, e.g. "." and ".2". */
1593 cpp_avoid_paste (cpp_reader *pfile, const cpp_token *token1,
1594 const cpp_token *token2)
1596 enum cpp_ttype a = token1->type, b = token2->type;
1597 cppchar_t c;
1599 if (token1->flags & NAMED_OP)
1600 a = CPP_NAME;
1601 if (token2->flags & NAMED_OP)
1602 b = CPP_NAME;
1604 c = EOF;
1605 if (token2->flags & DIGRAPH)
1606 c = digraph_spellings[(int) b - (int) CPP_FIRST_DIGRAPH][0];
1607 else if (token_spellings[b].category == SPELL_OPERATOR)
1608 c = token_spellings[b].name[0];
1610 /* Quickly get everything that can paste with an '='. */
1611 if ((int) a <= (int) CPP_LAST_EQ && c == '=')
1612 return 1;
1614 switch (a)
1616 case CPP_GREATER: return c == '>';
1617 case CPP_LESS: return c == '<' || c == '%' || c == ':';
1618 case CPP_PLUS: return c == '+';
1619 case CPP_MINUS: return c == '-' || c == '>';
1620 case CPP_DIV: return c == '/' || c == '*'; /* Comments. */
1621 case CPP_MOD: return c == ':' || c == '>';
1622 case CPP_AND: return c == '&';
1623 case CPP_OR: return c == '|';
1624 case CPP_COLON: return c == ':' || c == '>';
1625 case CPP_DEREF: return c == '*';
1626 case CPP_DOT: return c == '.' || c == '%' || b == CPP_NUMBER;
1627 case CPP_HASH: return c == '#' || c == '%'; /* Digraph form. */
1628 case CPP_NAME: return ((b == CPP_NUMBER
1629 && name_p (pfile, &token2->val.str))
1630 || b == CPP_NAME
1631 || b == CPP_CHAR || b == CPP_STRING); /* L */
1632 case CPP_NUMBER: return (b == CPP_NUMBER || b == CPP_NAME
1633 || c == '.' || c == '+' || c == '-');
1634 /* UCNs */
1635 case CPP_OTHER: return ((token1->val.str.text[0] == '\\'
1636 && b == CPP_NAME)
1637 || (CPP_OPTION (pfile, objc)
1638 && token1->val.str.text[0] == '@'
1639 && (b == CPP_NAME || b == CPP_STRING)));
1640 default: break;
1643 return 0;
1646 /* Output all the remaining tokens on the current line, and a newline
1647 character, to FP. Leading whitespace is removed. If there are
1648 macros, special token padding is not performed. */
1649 void
1650 cpp_output_line (cpp_reader *pfile, FILE *fp)
1652 const cpp_token *token;
1654 token = cpp_get_token (pfile);
1655 while (token->type != CPP_EOF)
1657 cpp_output_token (token, fp);
1658 token = cpp_get_token (pfile);
1659 if (token->flags & PREV_WHITE)
1660 putc (' ', fp);
1663 putc ('\n', fp);
1666 /* Return a string representation of all the remaining tokens on the
1667 current line. The result is allocated using xmalloc and must be
1668 freed by the caller. */
1669 unsigned char *
1670 cpp_output_line_to_string (cpp_reader *pfile, const unsigned char *dir_name)
1672 const cpp_token *token;
1673 unsigned int out = dir_name ? ustrlen (dir_name) : 0;
1674 unsigned int alloced = 120 + out;
1675 unsigned char *result = (unsigned char *) xmalloc (alloced);
1677 /* If DIR_NAME is empty, there are no initial contents. */
1678 if (dir_name)
1680 sprintf ((char *) result, "#%s ", dir_name);
1681 out += 2;
1684 token = cpp_get_token (pfile);
1685 while (token->type != CPP_EOF)
1687 unsigned char *last;
1688 /* Include room for a possible space and the terminating nul. */
1689 unsigned int len = cpp_token_len (token) + 2;
1691 if (out + len > alloced)
1693 alloced *= 2;
1694 if (out + len > alloced)
1695 alloced = out + len;
1696 result = (unsigned char *) xrealloc (result, alloced);
1699 last = cpp_spell_token (pfile, token, &result[out], 0);
1700 out = last - result;
1702 token = cpp_get_token (pfile);
1703 if (token->flags & PREV_WHITE)
1704 result[out++] = ' ';
1707 result[out] = '\0';
1708 return result;
1711 /* Memory buffers. Changing these three constants can have a dramatic
1712 effect on performance. The values here are reasonable defaults,
1713 but might be tuned. If you adjust them, be sure to test across a
1714 range of uses of cpplib, including heavy nested function-like macro
1715 expansion. Also check the change in peak memory usage (NJAMD is a
1716 good tool for this). */
1717 #define MIN_BUFF_SIZE 8000
1718 #define BUFF_SIZE_UPPER_BOUND(MIN_SIZE) (MIN_BUFF_SIZE + (MIN_SIZE) * 3 / 2)
1719 #define EXTENDED_BUFF_SIZE(BUFF, MIN_EXTRA) \
1720 (MIN_EXTRA + ((BUFF)->limit - (BUFF)->cur) * 2)
1722 #if MIN_BUFF_SIZE > BUFF_SIZE_UPPER_BOUND (0)
1723 #error BUFF_SIZE_UPPER_BOUND must be at least as large as MIN_BUFF_SIZE!
1724 #endif
1726 /* Create a new allocation buffer. Place the control block at the end
1727 of the buffer, so that buffer overflows will cause immediate chaos. */
1728 static _cpp_buff *
1729 new_buff (size_t len)
1731 _cpp_buff *result;
1732 unsigned char *base;
1734 if (len < MIN_BUFF_SIZE)
1735 len = MIN_BUFF_SIZE;
1736 len = CPP_ALIGN (len);
1738 base = XNEWVEC (unsigned char, len + sizeof (_cpp_buff));
1739 result = (_cpp_buff *) (base + len);
1740 result->base = base;
1741 result->cur = base;
1742 result->limit = base + len;
1743 result->next = NULL;
1744 return result;
1747 /* Place a chain of unwanted allocation buffers on the free list. */
1748 void
1749 _cpp_release_buff (cpp_reader *pfile, _cpp_buff *buff)
1751 _cpp_buff *end = buff;
1753 while (end->next)
1754 end = end->next;
1755 end->next = pfile->free_buffs;
1756 pfile->free_buffs = buff;
1759 /* Return a free buffer of size at least MIN_SIZE. */
1760 _cpp_buff *
1761 _cpp_get_buff (cpp_reader *pfile, size_t min_size)
1763 _cpp_buff *result, **p;
1765 for (p = &pfile->free_buffs;; p = &(*p)->next)
1767 size_t size;
1769 if (*p == NULL)
1770 return new_buff (min_size);
1771 result = *p;
1772 size = result->limit - result->base;
1773 /* Return a buffer that's big enough, but don't waste one that's
1774 way too big. */
1775 if (size >= min_size && size <= BUFF_SIZE_UPPER_BOUND (min_size))
1776 break;
1779 *p = result->next;
1780 result->next = NULL;
1781 result->cur = result->base;
1782 return result;
1785 /* Creates a new buffer with enough space to hold the uncommitted
1786 remaining bytes of BUFF, and at least MIN_EXTRA more bytes. Copies
1787 the excess bytes to the new buffer. Chains the new buffer after
1788 BUFF, and returns the new buffer. */
1789 _cpp_buff *
1790 _cpp_append_extend_buff (cpp_reader *pfile, _cpp_buff *buff, size_t min_extra)
1792 size_t size = EXTENDED_BUFF_SIZE (buff, min_extra);
1793 _cpp_buff *new_buff = _cpp_get_buff (pfile, size);
1795 buff->next = new_buff;
1796 memcpy (new_buff->base, buff->cur, BUFF_ROOM (buff));
1797 return new_buff;
1800 /* Creates a new buffer with enough space to hold the uncommitted
1801 remaining bytes of the buffer pointed to by BUFF, and at least
1802 MIN_EXTRA more bytes. Copies the excess bytes to the new buffer.
1803 Chains the new buffer before the buffer pointed to by BUFF, and
1804 updates the pointer to point to the new buffer. */
1805 void
1806 _cpp_extend_buff (cpp_reader *pfile, _cpp_buff **pbuff, size_t min_extra)
1808 _cpp_buff *new_buff, *old_buff = *pbuff;
1809 size_t size = EXTENDED_BUFF_SIZE (old_buff, min_extra);
1811 new_buff = _cpp_get_buff (pfile, size);
1812 memcpy (new_buff->base, old_buff->cur, BUFF_ROOM (old_buff));
1813 new_buff->next = old_buff;
1814 *pbuff = new_buff;
1817 /* Free a chain of buffers starting at BUFF. */
1818 void
1819 _cpp_free_buff (_cpp_buff *buff)
1821 _cpp_buff *next;
1823 for (; buff; buff = next)
1825 next = buff->next;
1826 free (buff->base);
1830 /* Allocate permanent, unaligned storage of length LEN. */
1831 unsigned char *
1832 _cpp_unaligned_alloc (cpp_reader *pfile, size_t len)
1834 _cpp_buff *buff = pfile->u_buff;
1835 unsigned char *result = buff->cur;
1837 if (len > (size_t) (buff->limit - result))
1839 buff = _cpp_get_buff (pfile, len);
1840 buff->next = pfile->u_buff;
1841 pfile->u_buff = buff;
1842 result = buff->cur;
1845 buff->cur = result + len;
1846 return result;
1849 /* Allocate permanent, unaligned storage of length LEN from a_buff.
1850 That buffer is used for growing allocations when saving macro
1851 replacement lists in a #define, and when parsing an answer to an
1852 assertion in #assert, #unassert or #if (and therefore possibly
1853 whilst expanding macros). It therefore must not be used by any
1854 code that they might call: specifically the lexer and the guts of
1855 the macro expander.
1857 All existing other uses clearly fit this restriction: storing
1858 registered pragmas during initialization. */
1859 unsigned char *
1860 _cpp_aligned_alloc (cpp_reader *pfile, size_t len)
1862 _cpp_buff *buff = pfile->a_buff;
1863 unsigned char *result = buff->cur;
1865 if (len > (size_t) (buff->limit - result))
1867 buff = _cpp_get_buff (pfile, len);
1868 buff->next = pfile->a_buff;
1869 pfile->a_buff = buff;
1870 result = buff->cur;
1873 buff->cur = result + len;
1874 return result;
1877 /* Say which field of TOK is in use. */
1879 enum cpp_token_fld_kind
1880 cpp_token_val_index (cpp_token *tok)
1882 switch (TOKEN_SPELL (tok))
1884 case SPELL_IDENT:
1885 return CPP_TOKEN_FLD_NODE;
1886 case SPELL_LITERAL:
1887 return CPP_TOKEN_FLD_STR;
1888 case SPELL_NONE:
1889 if (tok->type == CPP_MACRO_ARG)
1890 return CPP_TOKEN_FLD_ARG_NO;
1891 else if (tok->type == CPP_PADDING)
1892 return CPP_TOKEN_FLD_SOURCE;
1893 else if (tok->type == CPP_PRAGMA)
1894 return CPP_TOKEN_FLD_PRAGMA;
1895 /* else fall through */
1896 default:
1897 return CPP_TOKEN_FLD_NONE;