2008-08-06 Marc Gauthier <marc@tensilica.com>
[official-gcc.git] / libcpp / lex.c
blob2eb66bd63420f3338353bf5921cd53f410c5c076
1 /* CPP Library - lexical analysis.
2 Copyright (C) 2000, 2001, 2002, 2003, 2004, 2005, 2007, 2008 Free Software Foundation, Inc.
3 Contributed by Per Bothner, 1994-95.
4 Based on CCCP program by Paul Rubin, June 1986
5 Adapted to ANSI C, Richard Stallman, Jan 1987
6 Broken out to separate file, Zack Weinberg, Mar 2000
8 This program is free software; you can redistribute it and/or modify it
9 under the terms of the GNU General Public License as published by the
10 Free Software Foundation; either version 2, or (at your option) any
11 later version.
13 This program is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with this program; if not, write to the Free Software
20 Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
22 #include "config.h"
23 #include "system.h"
24 #include "cpplib.h"
25 #include "internal.h"
27 enum spell_type
29 SPELL_OPERATOR = 0,
30 SPELL_IDENT,
31 SPELL_LITERAL,
32 SPELL_NONE
35 struct token_spelling
37 enum spell_type category;
38 const unsigned char *name;
41 static const unsigned char *const digraph_spellings[] =
42 { UC"%:", UC"%:%:", UC"<:", UC":>", UC"<%", UC"%>" };
44 #define OP(e, s) { SPELL_OPERATOR, UC s },
45 #define TK(e, s) { SPELL_ ## s, UC #e },
46 static const struct token_spelling token_spellings[N_TTYPES] = { TTYPE_TABLE };
47 #undef OP
48 #undef TK
50 #define TOKEN_SPELL(token) (token_spellings[(token)->type].category)
51 #define TOKEN_NAME(token) (token_spellings[(token)->type].name)
53 static void add_line_note (cpp_buffer *, const uchar *, unsigned int);
54 static int skip_line_comment (cpp_reader *);
55 static void skip_whitespace (cpp_reader *, cppchar_t);
56 static void lex_string (cpp_reader *, cpp_token *, const uchar *);
57 static void save_comment (cpp_reader *, cpp_token *, const uchar *, cppchar_t);
58 static void create_literal (cpp_reader *, cpp_token *, const uchar *,
59 unsigned int, enum cpp_ttype);
60 static bool warn_in_comment (cpp_reader *, _cpp_line_note *);
61 static int name_p (cpp_reader *, const cpp_string *);
62 static tokenrun *next_tokenrun (tokenrun *);
64 static _cpp_buff *new_buff (size_t);
67 /* Utility routine:
69 Compares, the token TOKEN to the NUL-terminated string STRING.
70 TOKEN must be a CPP_NAME. Returns 1 for equal, 0 for unequal. */
71 int
72 cpp_ideq (const cpp_token *token, const char *string)
74 if (token->type != CPP_NAME)
75 return 0;
77 return !ustrcmp (NODE_NAME (token->val.node), (const uchar *) string);
80 /* Record a note TYPE at byte POS into the current cleaned logical
81 line. */
82 static void
83 add_line_note (cpp_buffer *buffer, const uchar *pos, unsigned int type)
85 if (buffer->notes_used == buffer->notes_cap)
87 buffer->notes_cap = buffer->notes_cap * 2 + 200;
88 buffer->notes = XRESIZEVEC (_cpp_line_note, buffer->notes,
89 buffer->notes_cap);
92 buffer->notes[buffer->notes_used].pos = pos;
93 buffer->notes[buffer->notes_used].type = type;
94 buffer->notes_used++;
97 /* Returns with a logical line that contains no escaped newlines or
98 trigraphs. This is a time-critical inner loop. */
99 void
100 _cpp_clean_line (cpp_reader *pfile)
102 cpp_buffer *buffer;
103 const uchar *s;
104 uchar c, *d, *p;
106 buffer = pfile->buffer;
107 buffer->cur_note = buffer->notes_used = 0;
108 buffer->cur = buffer->line_base = buffer->next_line;
109 buffer->need_line = false;
110 s = buffer->next_line - 1;
112 if (!buffer->from_stage3)
114 const uchar *pbackslash = NULL;
116 /* Short circuit for the common case of an un-escaped line with
117 no trigraphs. The primary win here is by not writing any
118 data back to memory until we have to. */
119 for (;;)
121 c = *++s;
122 if (__builtin_expect (c == '\n', false)
123 || __builtin_expect (c == '\r', false))
125 d = (uchar *) s;
127 if (__builtin_expect (s == buffer->rlimit, false))
128 goto done;
130 /* DOS line ending? */
131 if (__builtin_expect (c == '\r', false)
132 && s[1] == '\n')
134 s++;
135 if (s == buffer->rlimit)
136 goto done;
139 if (__builtin_expect (pbackslash == NULL, true))
140 goto done;
142 /* Check for escaped newline. */
143 p = d;
144 while (is_nvspace (p[-1]))
145 p--;
146 if (p - 1 != pbackslash)
147 goto done;
149 /* Have an escaped newline; process it and proceed to
150 the slow path. */
151 add_line_note (buffer, p - 1, p != d ? ' ' : '\\');
152 d = p - 2;
153 buffer->next_line = p - 1;
154 break;
156 if (__builtin_expect (c == '\\', false))
157 pbackslash = s;
158 else if (__builtin_expect (c == '?', false)
159 && __builtin_expect (s[1] == '?', false)
160 && _cpp_trigraph_map[s[2]])
162 /* Have a trigraph. We may or may not have to convert
163 it. Add a line note regardless, for -Wtrigraphs. */
164 add_line_note (buffer, s, s[2]);
165 if (CPP_OPTION (pfile, trigraphs))
167 /* We do, and that means we have to switch to the
168 slow path. */
169 d = (uchar *) s;
170 *d = _cpp_trigraph_map[s[2]];
171 s += 2;
172 break;
178 for (;;)
180 c = *++s;
181 *++d = c;
183 if (c == '\n' || c == '\r')
185 /* Handle DOS line endings. */
186 if (c == '\r' && s != buffer->rlimit && s[1] == '\n')
187 s++;
188 if (s == buffer->rlimit)
189 break;
191 /* Escaped? */
192 p = d;
193 while (p != buffer->next_line && is_nvspace (p[-1]))
194 p--;
195 if (p == buffer->next_line || p[-1] != '\\')
196 break;
198 add_line_note (buffer, p - 1, p != d ? ' ': '\\');
199 d = p - 2;
200 buffer->next_line = p - 1;
202 else if (c == '?' && s[1] == '?' && _cpp_trigraph_map[s[2]])
204 /* Add a note regardless, for the benefit of -Wtrigraphs. */
205 add_line_note (buffer, d, s[2]);
206 if (CPP_OPTION (pfile, trigraphs))
208 *d = _cpp_trigraph_map[s[2]];
209 s += 2;
214 else
217 s++;
218 while (*s != '\n' && *s != '\r');
219 d = (uchar *) s;
221 /* Handle DOS line endings. */
222 if (*s == '\r' && s != buffer->rlimit && s[1] == '\n')
223 s++;
226 done:
227 *d = '\n';
228 /* A sentinel note that should never be processed. */
229 add_line_note (buffer, d + 1, '\n');
230 buffer->next_line = s + 1;
233 /* Return true if the trigraph indicated by NOTE should be warned
234 about in a comment. */
235 static bool
236 warn_in_comment (cpp_reader *pfile, _cpp_line_note *note)
238 const uchar *p;
240 /* Within comments we don't warn about trigraphs, unless the
241 trigraph forms an escaped newline, as that may change
242 behavior. */
243 if (note->type != '/')
244 return false;
246 /* If -trigraphs, then this was an escaped newline iff the next note
247 is coincident. */
248 if (CPP_OPTION (pfile, trigraphs))
249 return note[1].pos == note->pos;
251 /* Otherwise, see if this forms an escaped newline. */
252 p = note->pos + 3;
253 while (is_nvspace (*p))
254 p++;
256 /* There might have been escaped newlines between the trigraph and the
257 newline we found. Hence the position test. */
258 return (*p == '\n' && p < note[1].pos);
261 /* Process the notes created by add_line_note as far as the current
262 location. */
263 void
264 _cpp_process_line_notes (cpp_reader *pfile, int in_comment)
266 cpp_buffer *buffer = pfile->buffer;
268 for (;;)
270 _cpp_line_note *note = &buffer->notes[buffer->cur_note];
271 unsigned int col;
273 if (note->pos > buffer->cur)
274 break;
276 buffer->cur_note++;
277 col = CPP_BUF_COLUMN (buffer, note->pos + 1);
279 if (note->type == '\\' || note->type == ' ')
281 if (note->type == ' ' && !in_comment)
282 cpp_error_with_line (pfile, CPP_DL_WARNING, pfile->line_table->highest_line, col,
283 "backslash and newline separated by space");
285 if (buffer->next_line > buffer->rlimit)
287 cpp_error_with_line (pfile, CPP_DL_PEDWARN, pfile->line_table->highest_line, col,
288 "backslash-newline at end of file");
289 /* Prevent "no newline at end of file" warning. */
290 buffer->next_line = buffer->rlimit;
293 buffer->line_base = note->pos;
294 CPP_INCREMENT_LINE (pfile, 0);
296 else if (_cpp_trigraph_map[note->type])
298 if (CPP_OPTION (pfile, warn_trigraphs)
299 && (!in_comment || warn_in_comment (pfile, note)))
301 if (CPP_OPTION (pfile, trigraphs))
302 cpp_error_with_line (pfile, CPP_DL_WARNING, pfile->line_table->highest_line, col,
303 "trigraph ??%c converted to %c",
304 note->type,
305 (int) _cpp_trigraph_map[note->type]);
306 else
308 cpp_error_with_line
309 (pfile, CPP_DL_WARNING, pfile->line_table->highest_line, col,
310 "trigraph ??%c ignored, use -trigraphs to enable",
311 note->type);
315 else
316 abort ();
320 /* Skip a C-style block comment. We find the end of the comment by
321 seeing if an asterisk is before every '/' we encounter. Returns
322 nonzero if comment terminated by EOF, zero otherwise.
324 Buffer->cur points to the initial asterisk of the comment. */
325 bool
326 _cpp_skip_block_comment (cpp_reader *pfile)
328 cpp_buffer *buffer = pfile->buffer;
329 const uchar *cur = buffer->cur;
330 uchar c;
332 cur++;
333 if (*cur == '/')
334 cur++;
336 for (;;)
338 /* People like decorating comments with '*', so check for '/'
339 instead for efficiency. */
340 c = *cur++;
342 if (c == '/')
344 if (cur[-2] == '*')
345 break;
347 /* Warn about potential nested comments, but not if the '/'
348 comes immediately before the true comment delimiter.
349 Don't bother to get it right across escaped newlines. */
350 if (CPP_OPTION (pfile, warn_comments)
351 && cur[0] == '*' && cur[1] != '/')
353 buffer->cur = cur;
354 cpp_error_with_line (pfile, CPP_DL_WARNING,
355 pfile->line_table->highest_line, CPP_BUF_COL (buffer),
356 "\"/*\" within comment");
359 else if (c == '\n')
361 unsigned int cols;
362 buffer->cur = cur - 1;
363 _cpp_process_line_notes (pfile, true);
364 if (buffer->next_line >= buffer->rlimit)
365 return true;
366 _cpp_clean_line (pfile);
368 cols = buffer->next_line - buffer->line_base;
369 CPP_INCREMENT_LINE (pfile, cols);
371 cur = buffer->cur;
375 buffer->cur = cur;
376 _cpp_process_line_notes (pfile, true);
377 return false;
380 /* Skip a C++ line comment, leaving buffer->cur pointing to the
381 terminating newline. Handles escaped newlines. Returns nonzero
382 if a multiline comment. */
383 static int
384 skip_line_comment (cpp_reader *pfile)
386 cpp_buffer *buffer = pfile->buffer;
387 source_location orig_line = pfile->line_table->highest_line;
389 while (*buffer->cur != '\n')
390 buffer->cur++;
392 _cpp_process_line_notes (pfile, true);
393 return orig_line != pfile->line_table->highest_line;
396 /* Skips whitespace, saving the next non-whitespace character. */
397 static void
398 skip_whitespace (cpp_reader *pfile, cppchar_t c)
400 cpp_buffer *buffer = pfile->buffer;
401 bool saw_NUL = false;
405 /* Horizontal space always OK. */
406 if (c == ' ' || c == '\t')
408 /* Just \f \v or \0 left. */
409 else if (c == '\0')
410 saw_NUL = true;
411 else if (pfile->state.in_directive && CPP_PEDANTIC (pfile))
412 cpp_error_with_line (pfile, CPP_DL_PEDWARN, pfile->line_table->highest_line,
413 CPP_BUF_COL (buffer),
414 "%s in preprocessing directive",
415 c == '\f' ? "form feed" : "vertical tab");
417 c = *buffer->cur++;
419 /* We only want non-vertical space, i.e. ' ' \t \f \v \0. */
420 while (is_nvspace (c));
422 if (saw_NUL)
423 cpp_error (pfile, CPP_DL_WARNING, "null character(s) ignored");
425 buffer->cur--;
428 /* See if the characters of a number token are valid in a name (no
429 '.', '+' or '-'). */
430 static int
431 name_p (cpp_reader *pfile, const cpp_string *string)
433 unsigned int i;
435 for (i = 0; i < string->len; i++)
436 if (!is_idchar (string->text[i]))
437 return 0;
439 return 1;
442 /* After parsing an identifier or other sequence, produce a warning about
443 sequences not in NFC/NFKC. */
444 static void
445 warn_about_normalization (cpp_reader *pfile,
446 const cpp_token *token,
447 const struct normalize_state *s)
449 if (CPP_OPTION (pfile, warn_normalize) < NORMALIZE_STATE_RESULT (s)
450 && !pfile->state.skipping)
452 /* Make sure that the token is printed using UCNs, even
453 if we'd otherwise happily print UTF-8. */
454 unsigned char *buf = XNEWVEC (unsigned char, cpp_token_len (token));
455 size_t sz;
457 sz = cpp_spell_token (pfile, token, buf, false) - buf;
458 if (NORMALIZE_STATE_RESULT (s) == normalized_C)
459 cpp_error_with_line (pfile, CPP_DL_WARNING, token->src_loc, 0,
460 "`%.*s' is not in NFKC", (int) sz, buf);
461 else
462 cpp_error_with_line (pfile, CPP_DL_WARNING, token->src_loc, 0,
463 "`%.*s' is not in NFC", (int) sz, buf);
467 /* Returns TRUE if the sequence starting at buffer->cur is invalid in
468 an identifier. FIRST is TRUE if this starts an identifier. */
469 static bool
470 forms_identifier_p (cpp_reader *pfile, int first,
471 struct normalize_state *state)
473 cpp_buffer *buffer = pfile->buffer;
475 if (*buffer->cur == '$')
477 if (!CPP_OPTION (pfile, dollars_in_ident))
478 return false;
480 buffer->cur++;
481 if (CPP_OPTION (pfile, warn_dollars) && !pfile->state.skipping)
483 CPP_OPTION (pfile, warn_dollars) = 0;
484 cpp_error (pfile, CPP_DL_PEDWARN, "'$' in identifier or number");
487 return true;
490 /* Is this a syntactically valid UCN? */
491 if (CPP_OPTION (pfile, extended_identifiers)
492 && *buffer->cur == '\\'
493 && (buffer->cur[1] == 'u' || buffer->cur[1] == 'U'))
495 buffer->cur += 2;
496 if (_cpp_valid_ucn (pfile, &buffer->cur, buffer->rlimit, 1 + !first,
497 state))
498 return true;
499 buffer->cur -= 2;
502 return false;
505 /* Lex an identifier starting at BUFFER->CUR - 1. */
506 static cpp_hashnode *
507 lex_identifier (cpp_reader *pfile, const uchar *base, bool starts_ucn,
508 struct normalize_state *nst)
510 cpp_hashnode *result;
511 const uchar *cur;
512 unsigned int len;
513 unsigned int hash = HT_HASHSTEP (0, *base);
515 cur = pfile->buffer->cur;
516 if (! starts_ucn)
517 while (ISIDNUM (*cur))
519 hash = HT_HASHSTEP (hash, *cur);
520 cur++;
522 pfile->buffer->cur = cur;
523 if (starts_ucn || forms_identifier_p (pfile, false, nst))
525 /* Slower version for identifiers containing UCNs (or $). */
526 do {
527 while (ISIDNUM (*pfile->buffer->cur))
529 pfile->buffer->cur++;
530 NORMALIZE_STATE_UPDATE_IDNUM (nst);
532 } while (forms_identifier_p (pfile, false, nst));
533 result = _cpp_interpret_identifier (pfile, base,
534 pfile->buffer->cur - base);
536 else
538 len = cur - base;
539 hash = HT_HASHFINISH (hash, len);
541 result = CPP_HASHNODE (ht_lookup_with_hash (pfile->hash_table,
542 base, len, hash, HT_ALLOC));
545 /* Rarely, identifiers require diagnostics when lexed. */
546 if (__builtin_expect ((result->flags & NODE_DIAGNOSTIC)
547 && !pfile->state.skipping, 0))
549 /* It is allowed to poison the same identifier twice. */
550 if ((result->flags & NODE_POISONED) && !pfile->state.poisoned_ok)
551 cpp_error (pfile, CPP_DL_ERROR, "attempt to use poisoned \"%s\"",
552 NODE_NAME (result));
554 /* Constraint 6.10.3.5: __VA_ARGS__ should only appear in the
555 replacement list of a variadic macro. */
556 if (result == pfile->spec_nodes.n__VA_ARGS__
557 && !pfile->state.va_args_ok)
558 cpp_error (pfile, CPP_DL_PEDWARN,
559 "__VA_ARGS__ can only appear in the expansion"
560 " of a C99 variadic macro");
563 return result;
566 /* Lex a number to NUMBER starting at BUFFER->CUR - 1. */
567 static void
568 lex_number (cpp_reader *pfile, cpp_string *number,
569 struct normalize_state *nst)
571 const uchar *cur;
572 const uchar *base;
573 uchar *dest;
575 base = pfile->buffer->cur - 1;
578 cur = pfile->buffer->cur;
580 /* N.B. ISIDNUM does not include $. */
581 while (ISIDNUM (*cur) || *cur == '.' || VALID_SIGN (*cur, cur[-1]))
583 cur++;
584 NORMALIZE_STATE_UPDATE_IDNUM (nst);
587 pfile->buffer->cur = cur;
589 while (forms_identifier_p (pfile, false, nst));
591 number->len = cur - base;
592 dest = _cpp_unaligned_alloc (pfile, number->len + 1);
593 memcpy (dest, base, number->len);
594 dest[number->len] = '\0';
595 number->text = dest;
598 /* Create a token of type TYPE with a literal spelling. */
599 static void
600 create_literal (cpp_reader *pfile, cpp_token *token, const uchar *base,
601 unsigned int len, enum cpp_ttype type)
603 uchar *dest = _cpp_unaligned_alloc (pfile, len + 1);
605 memcpy (dest, base, len);
606 dest[len] = '\0';
607 token->type = type;
608 token->val.str.len = len;
609 token->val.str.text = dest;
612 /* Lexes a string, character constant, or angle-bracketed header file
613 name. The stored string contains the spelling, including opening
614 quote and leading any leading 'L', 'u' or 'U'. It returns the type
615 of the literal, or CPP_OTHER if it was not properly terminated.
617 The spelling is NUL-terminated, but it is not guaranteed that this
618 is the first NUL since embedded NULs are preserved. */
619 static void
620 lex_string (cpp_reader *pfile, cpp_token *token, const uchar *base)
622 bool saw_NUL = false;
623 const uchar *cur;
624 cppchar_t terminator;
625 enum cpp_ttype type;
627 cur = base;
628 terminator = *cur++;
629 if (terminator == 'L' || terminator == 'u' || terminator == 'U')
630 terminator = *cur++;
631 if (terminator == '\"')
632 type = (*base == 'L' ? CPP_WSTRING :
633 *base == 'U' ? CPP_STRING32 :
634 *base == 'u' ? CPP_STRING16 : CPP_STRING);
635 else if (terminator == '\'')
636 type = (*base == 'L' ? CPP_WCHAR :
637 *base == 'U' ? CPP_CHAR32 :
638 *base == 'u' ? CPP_CHAR16 : CPP_CHAR);
639 else
640 terminator = '>', type = CPP_HEADER_NAME;
642 for (;;)
644 cppchar_t c = *cur++;
646 /* In #include-style directives, terminators are not escapable. */
647 if (c == '\\' && !pfile->state.angled_headers && *cur != '\n')
648 cur++;
649 else if (c == terminator)
650 break;
651 else if (c == '\n')
653 cur--;
654 type = CPP_OTHER;
655 break;
657 else if (c == '\0')
658 saw_NUL = true;
661 if (saw_NUL && !pfile->state.skipping)
662 cpp_error (pfile, CPP_DL_WARNING,
663 "null character(s) preserved in literal");
665 if (type == CPP_OTHER && CPP_OPTION (pfile, lang) != CLK_ASM)
666 cpp_error (pfile, CPP_DL_PEDWARN, "missing terminating %c character",
667 (int) terminator);
669 pfile->buffer->cur = cur;
670 create_literal (pfile, token, base, cur - base, type);
673 /* The stored comment includes the comment start and any terminator. */
674 static void
675 save_comment (cpp_reader *pfile, cpp_token *token, const unsigned char *from,
676 cppchar_t type)
678 unsigned char *buffer;
679 unsigned int len, clen;
681 len = pfile->buffer->cur - from + 1; /* + 1 for the initial '/'. */
683 /* C++ comments probably (not definitely) have moved past a new
684 line, which we don't want to save in the comment. */
685 if (is_vspace (pfile->buffer->cur[-1]))
686 len--;
688 /* If we are currently in a directive, then we need to store all
689 C++ comments as C comments internally, and so we need to
690 allocate a little extra space in that case.
692 Note that the only time we encounter a directive here is
693 when we are saving comments in a "#define". */
694 clen = (pfile->state.in_directive && type == '/') ? len + 2 : len;
696 buffer = _cpp_unaligned_alloc (pfile, clen);
698 token->type = CPP_COMMENT;
699 token->val.str.len = clen;
700 token->val.str.text = buffer;
702 buffer[0] = '/';
703 memcpy (buffer + 1, from, len - 1);
705 /* Finish conversion to a C comment, if necessary. */
706 if (pfile->state.in_directive && type == '/')
708 buffer[1] = '*';
709 buffer[clen - 2] = '*';
710 buffer[clen - 1] = '/';
714 /* Allocate COUNT tokens for RUN. */
715 void
716 _cpp_init_tokenrun (tokenrun *run, unsigned int count)
718 run->base = XNEWVEC (cpp_token, count);
719 run->limit = run->base + count;
720 run->next = NULL;
723 /* Returns the next tokenrun, or creates one if there is none. */
724 static tokenrun *
725 next_tokenrun (tokenrun *run)
727 if (run->next == NULL)
729 run->next = XNEW (tokenrun);
730 run->next->prev = run;
731 _cpp_init_tokenrun (run->next, 250);
734 return run->next;
737 /* Look ahead in the input stream. */
738 const cpp_token *
739 cpp_peek_token (cpp_reader *pfile, int index)
741 cpp_context *context = pfile->context;
742 const cpp_token *peektok;
743 int count;
745 /* First, scan through any pending cpp_context objects. */
746 while (context->prev)
748 ptrdiff_t sz = (context->direct_p
749 ? LAST (context).token - FIRST (context).token
750 : LAST (context).ptoken - FIRST (context).ptoken);
752 if (index < (int) sz)
753 return (context->direct_p
754 ? FIRST (context).token + index
755 : *(FIRST (context).ptoken + index));
757 index -= (int) sz;
758 context = context->prev;
761 /* We will have to read some new tokens after all (and do so
762 without invalidating preceding tokens). */
763 count = index;
764 pfile->keep_tokens++;
768 peektok = _cpp_lex_token (pfile);
769 if (peektok->type == CPP_EOF)
770 return peektok;
772 while (index--);
774 _cpp_backup_tokens_direct (pfile, count + 1);
775 pfile->keep_tokens--;
777 return peektok;
780 /* Allocate a single token that is invalidated at the same time as the
781 rest of the tokens on the line. Has its line and col set to the
782 same as the last lexed token, so that diagnostics appear in the
783 right place. */
784 cpp_token *
785 _cpp_temp_token (cpp_reader *pfile)
787 cpp_token *old, *result;
788 ptrdiff_t sz = pfile->cur_run->limit - pfile->cur_token;
789 ptrdiff_t la = (ptrdiff_t) pfile->lookaheads;
791 old = pfile->cur_token - 1;
792 /* Any pre-existing lookaheads must not be clobbered. */
793 if (la)
795 if (sz <= la)
797 tokenrun *next = next_tokenrun (pfile->cur_run);
799 if (sz < la)
800 memmove (next->base + 1, next->base,
801 (la - sz) * sizeof (cpp_token));
803 next->base[0] = pfile->cur_run->limit[-1];
806 if (sz > 1)
807 memmove (pfile->cur_token + 1, pfile->cur_token,
808 MIN (la, sz - 1) * sizeof (cpp_token));
811 if (!sz && pfile->cur_token == pfile->cur_run->limit)
813 pfile->cur_run = next_tokenrun (pfile->cur_run);
814 pfile->cur_token = pfile->cur_run->base;
817 result = pfile->cur_token++;
818 result->src_loc = old->src_loc;
819 return result;
822 /* Lex a token into RESULT (external interface). Takes care of issues
823 like directive handling, token lookahead, multiple include
824 optimization and skipping. */
825 const cpp_token *
826 _cpp_lex_token (cpp_reader *pfile)
828 cpp_token *result;
830 for (;;)
832 if (pfile->cur_token == pfile->cur_run->limit)
834 pfile->cur_run = next_tokenrun (pfile->cur_run);
835 pfile->cur_token = pfile->cur_run->base;
837 /* We assume that the current token is somewhere in the current
838 run. */
839 if (pfile->cur_token < pfile->cur_run->base
840 || pfile->cur_token >= pfile->cur_run->limit)
841 abort ();
843 if (pfile->lookaheads)
845 pfile->lookaheads--;
846 result = pfile->cur_token++;
848 else
849 result = _cpp_lex_direct (pfile);
851 if (result->flags & BOL)
853 /* Is this a directive. If _cpp_handle_directive returns
854 false, it is an assembler #. */
855 if (result->type == CPP_HASH
856 /* 6.10.3 p 11: Directives in a list of macro arguments
857 gives undefined behavior. This implementation
858 handles the directive as normal. */
859 && pfile->state.parsing_args != 1)
861 if (_cpp_handle_directive (pfile, result->flags & PREV_WHITE))
863 if (pfile->directive_result.type == CPP_PADDING)
864 continue;
865 result = &pfile->directive_result;
868 else if (pfile->state.in_deferred_pragma)
869 result = &pfile->directive_result;
871 if (pfile->cb.line_change && !pfile->state.skipping)
872 pfile->cb.line_change (pfile, result, pfile->state.parsing_args);
875 /* We don't skip tokens in directives. */
876 if (pfile->state.in_directive || pfile->state.in_deferred_pragma)
877 break;
879 /* Outside a directive, invalidate controlling macros. At file
880 EOF, _cpp_lex_direct takes care of popping the buffer, so we never
881 get here and MI optimization works. */
882 pfile->mi_valid = false;
884 if (!pfile->state.skipping || result->type == CPP_EOF)
885 break;
888 return result;
891 /* Returns true if a fresh line has been loaded. */
892 bool
893 _cpp_get_fresh_line (cpp_reader *pfile)
895 int return_at_eof;
897 /* We can't get a new line until we leave the current directive. */
898 if (pfile->state.in_directive)
899 return false;
901 for (;;)
903 cpp_buffer *buffer = pfile->buffer;
905 if (!buffer->need_line)
906 return true;
908 if (buffer->next_line < buffer->rlimit)
910 _cpp_clean_line (pfile);
911 return true;
914 /* First, get out of parsing arguments state. */
915 if (pfile->state.parsing_args)
916 return false;
918 /* End of buffer. Non-empty files should end in a newline. */
919 if (buffer->buf != buffer->rlimit
920 && buffer->next_line > buffer->rlimit
921 && !buffer->from_stage3)
923 /* Clip to buffer size. */
924 buffer->next_line = buffer->rlimit;
927 return_at_eof = buffer->return_at_eof;
928 _cpp_pop_buffer (pfile);
929 if (pfile->buffer == NULL || return_at_eof)
930 return false;
934 #define IF_NEXT_IS(CHAR, THEN_TYPE, ELSE_TYPE) \
935 do \
937 result->type = ELSE_TYPE; \
938 if (*buffer->cur == CHAR) \
939 buffer->cur++, result->type = THEN_TYPE; \
941 while (0)
943 /* Lex a token into pfile->cur_token, which is also incremented, to
944 get diagnostics pointing to the correct location.
946 Does not handle issues such as token lookahead, multiple-include
947 optimization, directives, skipping etc. This function is only
948 suitable for use by _cpp_lex_token, and in special cases like
949 lex_expansion_token which doesn't care for any of these issues.
951 When meeting a newline, returns CPP_EOF if parsing a directive,
952 otherwise returns to the start of the token buffer if permissible.
953 Returns the location of the lexed token. */
954 cpp_token *
955 _cpp_lex_direct (cpp_reader *pfile)
957 cppchar_t c;
958 cpp_buffer *buffer;
959 const unsigned char *comment_start;
960 cpp_token *result = pfile->cur_token++;
962 fresh_line:
963 result->flags = 0;
964 buffer = pfile->buffer;
965 if (buffer->need_line)
967 if (pfile->state.in_deferred_pragma)
969 result->type = CPP_PRAGMA_EOL;
970 pfile->state.in_deferred_pragma = false;
971 if (!pfile->state.pragma_allow_expansion)
972 pfile->state.prevent_expansion--;
973 return result;
975 if (!_cpp_get_fresh_line (pfile))
977 result->type = CPP_EOF;
978 if (!pfile->state.in_directive)
980 /* Tell the compiler the line number of the EOF token. */
981 result->src_loc = pfile->line_table->highest_line;
982 result->flags = BOL;
984 return result;
986 if (!pfile->keep_tokens)
988 pfile->cur_run = &pfile->base_run;
989 result = pfile->base_run.base;
990 pfile->cur_token = result + 1;
992 result->flags = BOL;
993 if (pfile->state.parsing_args == 2)
994 result->flags |= PREV_WHITE;
996 buffer = pfile->buffer;
997 update_tokens_line:
998 result->src_loc = pfile->line_table->highest_line;
1000 skipped_white:
1001 if (buffer->cur >= buffer->notes[buffer->cur_note].pos
1002 && !pfile->overlaid_buffer)
1004 _cpp_process_line_notes (pfile, false);
1005 result->src_loc = pfile->line_table->highest_line;
1007 c = *buffer->cur++;
1009 LINEMAP_POSITION_FOR_COLUMN (result->src_loc, pfile->line_table,
1010 CPP_BUF_COLUMN (buffer, buffer->cur));
1012 switch (c)
1014 case ' ': case '\t': case '\f': case '\v': case '\0':
1015 result->flags |= PREV_WHITE;
1016 skip_whitespace (pfile, c);
1017 goto skipped_white;
1019 case '\n':
1020 if (buffer->cur < buffer->rlimit)
1021 CPP_INCREMENT_LINE (pfile, 0);
1022 buffer->need_line = true;
1023 goto fresh_line;
1025 case '0': case '1': case '2': case '3': case '4':
1026 case '5': case '6': case '7': case '8': case '9':
1028 struct normalize_state nst = INITIAL_NORMALIZE_STATE;
1029 result->type = CPP_NUMBER;
1030 lex_number (pfile, &result->val.str, &nst);
1031 warn_about_normalization (pfile, result, &nst);
1032 break;
1035 case 'L':
1036 case 'u':
1037 case 'U':
1038 /* 'L', 'u' or 'U' may introduce wide characters or strings. */
1039 if (c == 'L' || CPP_OPTION (pfile, uliterals))
1041 if (*buffer->cur == '\'' || *buffer->cur == '"')
1043 lex_string (pfile, result, buffer->cur - 1);
1044 break;
1047 /* Fall through. */
1049 case '_':
1050 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
1051 case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
1052 case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
1053 case 's': case 't': case 'v': case 'w': case 'x':
1054 case 'y': case 'z':
1055 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
1056 case 'G': case 'H': case 'I': case 'J': case 'K':
1057 case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
1058 case 'S': case 'T': case 'V': case 'W': case 'X':
1059 case 'Y': case 'Z':
1060 result->type = CPP_NAME;
1062 struct normalize_state nst = INITIAL_NORMALIZE_STATE;
1063 result->val.node = lex_identifier (pfile, buffer->cur - 1, false,
1064 &nst);
1065 warn_about_normalization (pfile, result, &nst);
1068 /* Convert named operators to their proper types. */
1069 if (result->val.node->flags & NODE_OPERATOR)
1071 result->flags |= NAMED_OP;
1072 result->type = (enum cpp_ttype) result->val.node->directive_index;
1074 break;
1076 case '\'':
1077 case '"':
1078 lex_string (pfile, result, buffer->cur - 1);
1079 break;
1081 case '/':
1082 /* A potential block or line comment. */
1083 comment_start = buffer->cur;
1084 c = *buffer->cur;
1086 if (c == '*')
1088 if (_cpp_skip_block_comment (pfile))
1089 cpp_error (pfile, CPP_DL_ERROR, "unterminated comment");
1091 else if (c == '/' && (CPP_OPTION (pfile, cplusplus_comments)
1092 || cpp_in_system_header (pfile)))
1094 /* Warn about comments only if pedantically GNUC89, and not
1095 in system headers. */
1096 if (CPP_OPTION (pfile, lang) == CLK_GNUC89 && CPP_PEDANTIC (pfile)
1097 && ! buffer->warned_cplusplus_comments)
1099 cpp_error (pfile, CPP_DL_PEDWARN,
1100 "C++ style comments are not allowed in ISO C90");
1101 cpp_error (pfile, CPP_DL_PEDWARN,
1102 "(this will be reported only once per input file)");
1103 buffer->warned_cplusplus_comments = 1;
1106 if (skip_line_comment (pfile) && CPP_OPTION (pfile, warn_comments))
1107 cpp_error (pfile, CPP_DL_WARNING, "multi-line comment");
1109 else if (c == '=')
1111 buffer->cur++;
1112 result->type = CPP_DIV_EQ;
1113 break;
1115 else
1117 result->type = CPP_DIV;
1118 break;
1121 if (!pfile->state.save_comments)
1123 result->flags |= PREV_WHITE;
1124 goto update_tokens_line;
1127 /* Save the comment as a token in its own right. */
1128 save_comment (pfile, result, comment_start, c);
1129 break;
1131 case '<':
1132 if (pfile->state.angled_headers)
1134 lex_string (pfile, result, buffer->cur - 1);
1135 break;
1138 result->type = CPP_LESS;
1139 if (*buffer->cur == '=')
1140 buffer->cur++, result->type = CPP_LESS_EQ;
1141 else if (*buffer->cur == '<')
1143 buffer->cur++;
1144 IF_NEXT_IS ('=', CPP_LSHIFT_EQ, CPP_LSHIFT);
1146 else if (CPP_OPTION (pfile, digraphs))
1148 if (*buffer->cur == ':')
1150 buffer->cur++;
1151 result->flags |= DIGRAPH;
1152 result->type = CPP_OPEN_SQUARE;
1154 else if (*buffer->cur == '%')
1156 buffer->cur++;
1157 result->flags |= DIGRAPH;
1158 result->type = CPP_OPEN_BRACE;
1161 break;
1163 case '>':
1164 result->type = CPP_GREATER;
1165 if (*buffer->cur == '=')
1166 buffer->cur++, result->type = CPP_GREATER_EQ;
1167 else if (*buffer->cur == '>')
1169 buffer->cur++;
1170 IF_NEXT_IS ('=', CPP_RSHIFT_EQ, CPP_RSHIFT);
1172 break;
1174 case '%':
1175 result->type = CPP_MOD;
1176 if (*buffer->cur == '=')
1177 buffer->cur++, result->type = CPP_MOD_EQ;
1178 else if (CPP_OPTION (pfile, digraphs))
1180 if (*buffer->cur == ':')
1182 buffer->cur++;
1183 result->flags |= DIGRAPH;
1184 result->type = CPP_HASH;
1185 if (*buffer->cur == '%' && buffer->cur[1] == ':')
1186 buffer->cur += 2, result->type = CPP_PASTE;
1188 else if (*buffer->cur == '>')
1190 buffer->cur++;
1191 result->flags |= DIGRAPH;
1192 result->type = CPP_CLOSE_BRACE;
1195 break;
1197 case '.':
1198 result->type = CPP_DOT;
1199 if (ISDIGIT (*buffer->cur))
1201 struct normalize_state nst = INITIAL_NORMALIZE_STATE;
1202 result->type = CPP_NUMBER;
1203 lex_number (pfile, &result->val.str, &nst);
1204 warn_about_normalization (pfile, result, &nst);
1206 else if (*buffer->cur == '.' && buffer->cur[1] == '.')
1207 buffer->cur += 2, result->type = CPP_ELLIPSIS;
1208 else if (*buffer->cur == '*' && CPP_OPTION (pfile, cplusplus))
1209 buffer->cur++, result->type = CPP_DOT_STAR;
1210 break;
1212 case '+':
1213 result->type = CPP_PLUS;
1214 if (*buffer->cur == '+')
1215 buffer->cur++, result->type = CPP_PLUS_PLUS;
1216 else if (*buffer->cur == '=')
1217 buffer->cur++, result->type = CPP_PLUS_EQ;
1218 break;
1220 case '-':
1221 result->type = CPP_MINUS;
1222 if (*buffer->cur == '>')
1224 buffer->cur++;
1225 result->type = CPP_DEREF;
1226 if (*buffer->cur == '*' && CPP_OPTION (pfile, cplusplus))
1227 buffer->cur++, result->type = CPP_DEREF_STAR;
1229 else if (*buffer->cur == '-')
1230 buffer->cur++, result->type = CPP_MINUS_MINUS;
1231 else if (*buffer->cur == '=')
1232 buffer->cur++, result->type = CPP_MINUS_EQ;
1233 break;
1235 case '&':
1236 result->type = CPP_AND;
1237 if (*buffer->cur == '&')
1238 buffer->cur++, result->type = CPP_AND_AND;
1239 else if (*buffer->cur == '=')
1240 buffer->cur++, result->type = CPP_AND_EQ;
1241 break;
1243 case '|':
1244 result->type = CPP_OR;
1245 if (*buffer->cur == '|')
1246 buffer->cur++, result->type = CPP_OR_OR;
1247 else if (*buffer->cur == '=')
1248 buffer->cur++, result->type = CPP_OR_EQ;
1249 break;
1251 case ':':
1252 result->type = CPP_COLON;
1253 if (*buffer->cur == ':' && CPP_OPTION (pfile, cplusplus))
1254 buffer->cur++, result->type = CPP_SCOPE;
1255 else if (*buffer->cur == '>' && CPP_OPTION (pfile, digraphs))
1257 buffer->cur++;
1258 result->flags |= DIGRAPH;
1259 result->type = CPP_CLOSE_SQUARE;
1261 break;
1263 case '*': IF_NEXT_IS ('=', CPP_MULT_EQ, CPP_MULT); break;
1264 case '=': IF_NEXT_IS ('=', CPP_EQ_EQ, CPP_EQ); break;
1265 case '!': IF_NEXT_IS ('=', CPP_NOT_EQ, CPP_NOT); break;
1266 case '^': IF_NEXT_IS ('=', CPP_XOR_EQ, CPP_XOR); break;
1267 case '#': IF_NEXT_IS ('#', CPP_PASTE, CPP_HASH); break;
1269 case '?': result->type = CPP_QUERY; break;
1270 case '~': result->type = CPP_COMPL; break;
1271 case ',': result->type = CPP_COMMA; break;
1272 case '(': result->type = CPP_OPEN_PAREN; break;
1273 case ')': result->type = CPP_CLOSE_PAREN; break;
1274 case '[': result->type = CPP_OPEN_SQUARE; break;
1275 case ']': result->type = CPP_CLOSE_SQUARE; break;
1276 case '{': result->type = CPP_OPEN_BRACE; break;
1277 case '}': result->type = CPP_CLOSE_BRACE; break;
1278 case ';': result->type = CPP_SEMICOLON; break;
1280 /* @ is a punctuator in Objective-C. */
1281 case '@': result->type = CPP_ATSIGN; break;
1283 case '$':
1284 case '\\':
1286 const uchar *base = --buffer->cur;
1287 struct normalize_state nst = INITIAL_NORMALIZE_STATE;
1289 if (forms_identifier_p (pfile, true, &nst))
1291 result->type = CPP_NAME;
1292 result->val.node = lex_identifier (pfile, base, true, &nst);
1293 warn_about_normalization (pfile, result, &nst);
1294 break;
1296 buffer->cur++;
1299 default:
1300 create_literal (pfile, result, buffer->cur - 1, 1, CPP_OTHER);
1301 break;
1304 return result;
1307 /* An upper bound on the number of bytes needed to spell TOKEN.
1308 Does not include preceding whitespace. */
1309 unsigned int
1310 cpp_token_len (const cpp_token *token)
1312 unsigned int len;
1314 switch (TOKEN_SPELL (token))
1316 default: len = 4; break;
1317 case SPELL_LITERAL: len = token->val.str.len; break;
1318 case SPELL_IDENT: len = NODE_LEN (token->val.node) * 10; break;
1321 return len;
1324 /* Parse UTF-8 out of NAMEP and place a \U escape in BUFFER.
1325 Return the number of bytes read out of NAME. (There are always
1326 10 bytes written to BUFFER.) */
1328 static size_t
1329 utf8_to_ucn (unsigned char *buffer, const unsigned char *name)
1331 int j;
1332 int ucn_len = 0;
1333 int ucn_len_c;
1334 unsigned t;
1335 unsigned long utf32;
1337 /* Compute the length of the UTF-8 sequence. */
1338 for (t = *name; t & 0x80; t <<= 1)
1339 ucn_len++;
1341 utf32 = *name & (0x7F >> ucn_len);
1342 for (ucn_len_c = 1; ucn_len_c < ucn_len; ucn_len_c++)
1344 utf32 = (utf32 << 6) | (*++name & 0x3F);
1346 /* Ill-formed UTF-8. */
1347 if ((*name & ~0x3F) != 0x80)
1348 abort ();
1351 *buffer++ = '\\';
1352 *buffer++ = 'U';
1353 for (j = 7; j >= 0; j--)
1354 *buffer++ = "0123456789abcdef"[(utf32 >> (4 * j)) & 0xF];
1355 return ucn_len;
1359 /* Write the spelling of a token TOKEN to BUFFER. The buffer must
1360 already contain the enough space to hold the token's spelling.
1361 Returns a pointer to the character after the last character written.
1362 FORSTRING is true if this is to be the spelling after translation
1363 phase 1 (this is different for UCNs).
1364 FIXME: Would be nice if we didn't need the PFILE argument. */
1365 unsigned char *
1366 cpp_spell_token (cpp_reader *pfile, const cpp_token *token,
1367 unsigned char *buffer, bool forstring)
1369 switch (TOKEN_SPELL (token))
1371 case SPELL_OPERATOR:
1373 const unsigned char *spelling;
1374 unsigned char c;
1376 if (token->flags & DIGRAPH)
1377 spelling
1378 = digraph_spellings[(int) token->type - (int) CPP_FIRST_DIGRAPH];
1379 else if (token->flags & NAMED_OP)
1380 goto spell_ident;
1381 else
1382 spelling = TOKEN_NAME (token);
1384 while ((c = *spelling++) != '\0')
1385 *buffer++ = c;
1387 break;
1389 spell_ident:
1390 case SPELL_IDENT:
1391 if (forstring)
1393 memcpy (buffer, NODE_NAME (token->val.node),
1394 NODE_LEN (token->val.node));
1395 buffer += NODE_LEN (token->val.node);
1397 else
1399 size_t i;
1400 const unsigned char * name = NODE_NAME (token->val.node);
1402 for (i = 0; i < NODE_LEN (token->val.node); i++)
1403 if (name[i] & ~0x7F)
1405 i += utf8_to_ucn (buffer, name + i) - 1;
1406 buffer += 10;
1408 else
1409 *buffer++ = NODE_NAME (token->val.node)[i];
1411 break;
1413 case SPELL_LITERAL:
1414 memcpy (buffer, token->val.str.text, token->val.str.len);
1415 buffer += token->val.str.len;
1416 break;
1418 case SPELL_NONE:
1419 cpp_error (pfile, CPP_DL_ICE,
1420 "unspellable token %s", TOKEN_NAME (token));
1421 break;
1424 return buffer;
1427 /* Returns TOKEN spelt as a null-terminated string. The string is
1428 freed when the reader is destroyed. Useful for diagnostics. */
1429 unsigned char *
1430 cpp_token_as_text (cpp_reader *pfile, const cpp_token *token)
1432 unsigned int len = cpp_token_len (token) + 1;
1433 unsigned char *start = _cpp_unaligned_alloc (pfile, len), *end;
1435 end = cpp_spell_token (pfile, token, start, false);
1436 end[0] = '\0';
1438 return start;
1441 /* Used by C front ends, which really should move to using
1442 cpp_token_as_text. */
1443 const char *
1444 cpp_type2name (enum cpp_ttype type)
1446 return (const char *) token_spellings[type].name;
1449 /* Writes the spelling of token to FP, without any preceding space.
1450 Separated from cpp_spell_token for efficiency - to avoid stdio
1451 double-buffering. */
1452 void
1453 cpp_output_token (const cpp_token *token, FILE *fp)
1455 switch (TOKEN_SPELL (token))
1457 case SPELL_OPERATOR:
1459 const unsigned char *spelling;
1460 int c;
1462 if (token->flags & DIGRAPH)
1463 spelling
1464 = digraph_spellings[(int) token->type - (int) CPP_FIRST_DIGRAPH];
1465 else if (token->flags & NAMED_OP)
1466 goto spell_ident;
1467 else
1468 spelling = TOKEN_NAME (token);
1470 c = *spelling;
1472 putc (c, fp);
1473 while ((c = *++spelling) != '\0');
1475 break;
1477 spell_ident:
1478 case SPELL_IDENT:
1480 size_t i;
1481 const unsigned char * name = NODE_NAME (token->val.node);
1483 for (i = 0; i < NODE_LEN (token->val.node); i++)
1484 if (name[i] & ~0x7F)
1486 unsigned char buffer[10];
1487 i += utf8_to_ucn (buffer, name + i) - 1;
1488 fwrite (buffer, 1, 10, fp);
1490 else
1491 fputc (NODE_NAME (token->val.node)[i], fp);
1493 break;
1495 case SPELL_LITERAL:
1496 fwrite (token->val.str.text, 1, token->val.str.len, fp);
1497 break;
1499 case SPELL_NONE:
1500 /* An error, most probably. */
1501 break;
1505 /* Compare two tokens. */
1507 _cpp_equiv_tokens (const cpp_token *a, const cpp_token *b)
1509 if (a->type == b->type && a->flags == b->flags)
1510 switch (TOKEN_SPELL (a))
1512 default: /* Keep compiler happy. */
1513 case SPELL_OPERATOR:
1514 return 1;
1515 case SPELL_NONE:
1516 return (a->type != CPP_MACRO_ARG || a->val.arg_no == b->val.arg_no);
1517 case SPELL_IDENT:
1518 return a->val.node == b->val.node;
1519 case SPELL_LITERAL:
1520 return (a->val.str.len == b->val.str.len
1521 && !memcmp (a->val.str.text, b->val.str.text,
1522 a->val.str.len));
1525 return 0;
1528 /* Returns nonzero if a space should be inserted to avoid an
1529 accidental token paste for output. For simplicity, it is
1530 conservative, and occasionally advises a space where one is not
1531 needed, e.g. "." and ".2". */
1533 cpp_avoid_paste (cpp_reader *pfile, const cpp_token *token1,
1534 const cpp_token *token2)
1536 enum cpp_ttype a = token1->type, b = token2->type;
1537 cppchar_t c;
1539 if (token1->flags & NAMED_OP)
1540 a = CPP_NAME;
1541 if (token2->flags & NAMED_OP)
1542 b = CPP_NAME;
1544 c = EOF;
1545 if (token2->flags & DIGRAPH)
1546 c = digraph_spellings[(int) b - (int) CPP_FIRST_DIGRAPH][0];
1547 else if (token_spellings[b].category == SPELL_OPERATOR)
1548 c = token_spellings[b].name[0];
1550 /* Quickly get everything that can paste with an '='. */
1551 if ((int) a <= (int) CPP_LAST_EQ && c == '=')
1552 return 1;
1554 switch (a)
1556 case CPP_GREATER: return c == '>';
1557 case CPP_LESS: return c == '<' || c == '%' || c == ':';
1558 case CPP_PLUS: return c == '+';
1559 case CPP_MINUS: return c == '-' || c == '>';
1560 case CPP_DIV: return c == '/' || c == '*'; /* Comments. */
1561 case CPP_MOD: return c == ':' || c == '>';
1562 case CPP_AND: return c == '&';
1563 case CPP_OR: return c == '|';
1564 case CPP_COLON: return c == ':' || c == '>';
1565 case CPP_DEREF: return c == '*';
1566 case CPP_DOT: return c == '.' || c == '%' || b == CPP_NUMBER;
1567 case CPP_HASH: return c == '#' || c == '%'; /* Digraph form. */
1568 case CPP_NAME: return ((b == CPP_NUMBER
1569 && name_p (pfile, &token2->val.str))
1570 || b == CPP_NAME
1571 || b == CPP_CHAR || b == CPP_STRING); /* L */
1572 case CPP_NUMBER: return (b == CPP_NUMBER || b == CPP_NAME
1573 || c == '.' || c == '+' || c == '-');
1574 /* UCNs */
1575 case CPP_OTHER: return ((token1->val.str.text[0] == '\\'
1576 && b == CPP_NAME)
1577 || (CPP_OPTION (pfile, objc)
1578 && token1->val.str.text[0] == '@'
1579 && (b == CPP_NAME || b == CPP_STRING)));
1580 default: break;
1583 return 0;
1586 /* Output all the remaining tokens on the current line, and a newline
1587 character, to FP. Leading whitespace is removed. If there are
1588 macros, special token padding is not performed. */
1589 void
1590 cpp_output_line (cpp_reader *pfile, FILE *fp)
1592 const cpp_token *token;
1594 token = cpp_get_token (pfile);
1595 while (token->type != CPP_EOF)
1597 cpp_output_token (token, fp);
1598 token = cpp_get_token (pfile);
1599 if (token->flags & PREV_WHITE)
1600 putc (' ', fp);
1603 putc ('\n', fp);
1606 /* Return a string representation of all the remaining tokens on the
1607 current line. The result is allocated using xmalloc and must be
1608 freed by the caller. */
1609 unsigned char *
1610 cpp_output_line_to_string (cpp_reader *pfile, const unsigned char *dir_name)
1612 const cpp_token *token;
1613 unsigned int out = dir_name ? ustrlen (dir_name) : 0;
1614 unsigned int alloced = 120 + out;
1615 unsigned char *result = (unsigned char *) xmalloc (alloced);
1617 /* If DIR_NAME is empty, there are no initial contents. */
1618 if (dir_name)
1620 sprintf ((char *) result, "#%s ", dir_name);
1621 out += 2;
1624 token = cpp_get_token (pfile);
1625 while (token->type != CPP_EOF)
1627 unsigned char *last;
1628 /* Include room for a possible space and the terminating nul. */
1629 unsigned int len = cpp_token_len (token) + 2;
1631 if (out + len > alloced)
1633 alloced *= 2;
1634 if (out + len > alloced)
1635 alloced = out + len;
1636 result = (unsigned char *) xrealloc (result, alloced);
1639 last = cpp_spell_token (pfile, token, &result[out], 0);
1640 out = last - result;
1642 token = cpp_get_token (pfile);
1643 if (token->flags & PREV_WHITE)
1644 result[out++] = ' ';
1647 result[out] = '\0';
1648 return result;
1651 /* Memory buffers. Changing these three constants can have a dramatic
1652 effect on performance. The values here are reasonable defaults,
1653 but might be tuned. If you adjust them, be sure to test across a
1654 range of uses of cpplib, including heavy nested function-like macro
1655 expansion. Also check the change in peak memory usage (NJAMD is a
1656 good tool for this). */
1657 #define MIN_BUFF_SIZE 8000
1658 #define BUFF_SIZE_UPPER_BOUND(MIN_SIZE) (MIN_BUFF_SIZE + (MIN_SIZE) * 3 / 2)
1659 #define EXTENDED_BUFF_SIZE(BUFF, MIN_EXTRA) \
1660 (MIN_EXTRA + ((BUFF)->limit - (BUFF)->cur) * 2)
1662 #if MIN_BUFF_SIZE > BUFF_SIZE_UPPER_BOUND (0)
1663 #error BUFF_SIZE_UPPER_BOUND must be at least as large as MIN_BUFF_SIZE!
1664 #endif
1666 /* Create a new allocation buffer. Place the control block at the end
1667 of the buffer, so that buffer overflows will cause immediate chaos. */
1668 static _cpp_buff *
1669 new_buff (size_t len)
1671 _cpp_buff *result;
1672 unsigned char *base;
1674 if (len < MIN_BUFF_SIZE)
1675 len = MIN_BUFF_SIZE;
1676 len = CPP_ALIGN (len);
1678 base = XNEWVEC (unsigned char, len + sizeof (_cpp_buff));
1679 result = (_cpp_buff *) (base + len);
1680 result->base = base;
1681 result->cur = base;
1682 result->limit = base + len;
1683 result->next = NULL;
1684 return result;
1687 /* Place a chain of unwanted allocation buffers on the free list. */
1688 void
1689 _cpp_release_buff (cpp_reader *pfile, _cpp_buff *buff)
1691 _cpp_buff *end = buff;
1693 while (end->next)
1694 end = end->next;
1695 end->next = pfile->free_buffs;
1696 pfile->free_buffs = buff;
1699 /* Return a free buffer of size at least MIN_SIZE. */
1700 _cpp_buff *
1701 _cpp_get_buff (cpp_reader *pfile, size_t min_size)
1703 _cpp_buff *result, **p;
1705 for (p = &pfile->free_buffs;; p = &(*p)->next)
1707 size_t size;
1709 if (*p == NULL)
1710 return new_buff (min_size);
1711 result = *p;
1712 size = result->limit - result->base;
1713 /* Return a buffer that's big enough, but don't waste one that's
1714 way too big. */
1715 if (size >= min_size && size <= BUFF_SIZE_UPPER_BOUND (min_size))
1716 break;
1719 *p = result->next;
1720 result->next = NULL;
1721 result->cur = result->base;
1722 return result;
1725 /* Creates a new buffer with enough space to hold the uncommitted
1726 remaining bytes of BUFF, and at least MIN_EXTRA more bytes. Copies
1727 the excess bytes to the new buffer. Chains the new buffer after
1728 BUFF, and returns the new buffer. */
1729 _cpp_buff *
1730 _cpp_append_extend_buff (cpp_reader *pfile, _cpp_buff *buff, size_t min_extra)
1732 size_t size = EXTENDED_BUFF_SIZE (buff, min_extra);
1733 _cpp_buff *new_buff = _cpp_get_buff (pfile, size);
1735 buff->next = new_buff;
1736 memcpy (new_buff->base, buff->cur, BUFF_ROOM (buff));
1737 return new_buff;
1740 /* Creates a new buffer with enough space to hold the uncommitted
1741 remaining bytes of the buffer pointed to by BUFF, and at least
1742 MIN_EXTRA more bytes. Copies the excess bytes to the new buffer.
1743 Chains the new buffer before the buffer pointed to by BUFF, and
1744 updates the pointer to point to the new buffer. */
1745 void
1746 _cpp_extend_buff (cpp_reader *pfile, _cpp_buff **pbuff, size_t min_extra)
1748 _cpp_buff *new_buff, *old_buff = *pbuff;
1749 size_t size = EXTENDED_BUFF_SIZE (old_buff, min_extra);
1751 new_buff = _cpp_get_buff (pfile, size);
1752 memcpy (new_buff->base, old_buff->cur, BUFF_ROOM (old_buff));
1753 new_buff->next = old_buff;
1754 *pbuff = new_buff;
1757 /* Free a chain of buffers starting at BUFF. */
1758 void
1759 _cpp_free_buff (_cpp_buff *buff)
1761 _cpp_buff *next;
1763 for (; buff; buff = next)
1765 next = buff->next;
1766 free (buff->base);
1770 /* Allocate permanent, unaligned storage of length LEN. */
1771 unsigned char *
1772 _cpp_unaligned_alloc (cpp_reader *pfile, size_t len)
1774 _cpp_buff *buff = pfile->u_buff;
1775 unsigned char *result = buff->cur;
1777 if (len > (size_t) (buff->limit - result))
1779 buff = _cpp_get_buff (pfile, len);
1780 buff->next = pfile->u_buff;
1781 pfile->u_buff = buff;
1782 result = buff->cur;
1785 buff->cur = result + len;
1786 return result;
1789 /* Allocate permanent, unaligned storage of length LEN from a_buff.
1790 That buffer is used for growing allocations when saving macro
1791 replacement lists in a #define, and when parsing an answer to an
1792 assertion in #assert, #unassert or #if (and therefore possibly
1793 whilst expanding macros). It therefore must not be used by any
1794 code that they might call: specifically the lexer and the guts of
1795 the macro expander.
1797 All existing other uses clearly fit this restriction: storing
1798 registered pragmas during initialization. */
1799 unsigned char *
1800 _cpp_aligned_alloc (cpp_reader *pfile, size_t len)
1802 _cpp_buff *buff = pfile->a_buff;
1803 unsigned char *result = buff->cur;
1805 if (len > (size_t) (buff->limit - result))
1807 buff = _cpp_get_buff (pfile, len);
1808 buff->next = pfile->a_buff;
1809 pfile->a_buff = buff;
1810 result = buff->cur;
1813 buff->cur = result + len;
1814 return result;
1817 /* Say which field of TOK is in use. */
1819 enum cpp_token_fld_kind
1820 cpp_token_val_index (cpp_token *tok)
1822 switch (TOKEN_SPELL (tok))
1824 case SPELL_IDENT:
1825 return CPP_TOKEN_FLD_NODE;
1826 case SPELL_LITERAL:
1827 return CPP_TOKEN_FLD_STR;
1828 case SPELL_NONE:
1829 if (tok->type == CPP_MACRO_ARG)
1830 return CPP_TOKEN_FLD_ARG_NO;
1831 else if (tok->type == CPP_PADDING)
1832 return CPP_TOKEN_FLD_SOURCE;
1833 else if (tok->type == CPP_PRAGMA)
1834 return CPP_TOKEN_FLD_PRAGMA;
1835 /* else fall through */
1836 default:
1837 return CPP_TOKEN_FLD_NONE;