Daily bump.
[official-gcc.git] / libcpp / lex.c
blob8398c7ca06122480e4718fbc246080987e3faa77
1 /* CPP Library - lexical analysis.
2 Copyright (C) 2000, 2001, 2002, 2003, 2004 Free Software Foundation, Inc.
3 Contributed by Per Bothner, 1994-95.
4 Based on CCCP program by Paul Rubin, June 1986
5 Adapted to ANSI C, Richard Stallman, Jan 1987
6 Broken out to separate file, Zack Weinberg, Mar 2000
8 This program is free software; you can redistribute it and/or modify it
9 under the terms of the GNU General Public License as published by the
10 Free Software Foundation; either version 2, or (at your option) any
11 later version.
13 This program is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with this program; if not, write to the Free Software
20 Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
22 #include "config.h"
23 #include "system.h"
24 #include "cpplib.h"
25 #include "internal.h"
27 enum spell_type
29 SPELL_OPERATOR = 0,
30 SPELL_IDENT,
31 SPELL_LITERAL,
32 SPELL_NONE
35 struct token_spelling
37 enum spell_type category;
38 const unsigned char *name;
41 static const unsigned char *const digraph_spellings[] =
42 { U"%:", U"%:%:", U"<:", U":>", U"<%", U"%>" };
44 #define OP(e, s) { SPELL_OPERATOR, U s },
45 #define TK(e, s) { SPELL_ ## s, U #e },
46 static const struct token_spelling token_spellings[N_TTYPES] = { TTYPE_TABLE };
47 #undef OP
48 #undef TK
50 #define TOKEN_SPELL(token) (token_spellings[(token)->type].category)
51 #define TOKEN_NAME(token) (token_spellings[(token)->type].name)
53 static void add_line_note (cpp_buffer *, const uchar *, unsigned int);
54 static int skip_line_comment (cpp_reader *);
55 static void skip_whitespace (cpp_reader *, cppchar_t);
56 static cpp_hashnode *lex_identifier (cpp_reader *, const uchar *, bool);
57 static void lex_number (cpp_reader *, cpp_string *);
58 static bool forms_identifier_p (cpp_reader *, int);
59 static void lex_string (cpp_reader *, cpp_token *, const uchar *);
60 static void save_comment (cpp_reader *, cpp_token *, const uchar *, cppchar_t);
61 static void create_literal (cpp_reader *, cpp_token *, const uchar *,
62 unsigned int, enum cpp_ttype);
63 static bool warn_in_comment (cpp_reader *, _cpp_line_note *);
64 static int name_p (cpp_reader *, const cpp_string *);
65 static tokenrun *next_tokenrun (tokenrun *);
67 static _cpp_buff *new_buff (size_t);
70 /* Utility routine:
72 Compares, the token TOKEN to the NUL-terminated string STRING.
73 TOKEN must be a CPP_NAME. Returns 1 for equal, 0 for unequal. */
74 int
75 cpp_ideq (const cpp_token *token, const char *string)
77 if (token->type != CPP_NAME)
78 return 0;
80 return !ustrcmp (NODE_NAME (token->val.node), (const uchar *) string);
83 /* Record a note TYPE at byte POS into the current cleaned logical
84 line. */
85 static void
86 add_line_note (cpp_buffer *buffer, const uchar *pos, unsigned int type)
88 if (buffer->notes_used == buffer->notes_cap)
90 buffer->notes_cap = buffer->notes_cap * 2 + 200;
91 buffer->notes = xrealloc (buffer->notes,
92 buffer->notes_cap * sizeof (_cpp_line_note));
95 buffer->notes[buffer->notes_used].pos = pos;
96 buffer->notes[buffer->notes_used].type = type;
97 buffer->notes_used++;
100 /* Returns with a logical line that contains no escaped newlines or
101 trigraphs. This is a time-critical inner loop. */
102 void
103 _cpp_clean_line (cpp_reader *pfile)
105 cpp_buffer *buffer;
106 const uchar *s;
107 uchar c, *d, *p;
109 buffer = pfile->buffer;
110 buffer->cur_note = buffer->notes_used = 0;
111 buffer->cur = buffer->line_base = buffer->next_line;
112 buffer->need_line = false;
113 s = buffer->next_line - 1;
115 if (!buffer->from_stage3)
117 /* Short circuit for the common case of an un-escaped line with
118 no trigraphs. The primary win here is by not writing any
119 data back to memory until we have to. */
120 for (;;)
122 c = *++s;
123 if (c == '\n' || c == '\r')
125 d = (uchar *) s;
127 if (s == buffer->rlimit)
128 goto done;
130 /* DOS line ending? */
131 if (c == '\r' && s[1] == '\n')
132 s++;
134 if (s == buffer->rlimit)
135 goto done;
137 /* check for escaped newline */
138 p = d;
139 while (p != buffer->next_line && is_nvspace (p[-1]))
140 p--;
141 if (p == buffer->next_line || p[-1] != '\\')
142 goto done;
144 /* Have an escaped newline; process it and proceed to
145 the slow path. */
146 add_line_note (buffer, p - 1, p != d ? ' ' : '\\');
147 d = p - 2;
148 buffer->next_line = p - 1;
149 break;
151 if (c == '?' && s[1] == '?' && _cpp_trigraph_map[s[2]])
153 /* Have a trigraph. We may or may not have to convert
154 it. Add a line note regardless, for -Wtrigraphs. */
155 add_line_note (buffer, s, s[2]);
156 if (CPP_OPTION (pfile, trigraphs))
158 /* We do, and that means we have to switch to the
159 slow path. */
160 d = (uchar *) s;
161 *d = _cpp_trigraph_map[s[2]];
162 s += 2;
163 break;
169 for (;;)
171 c = *++s;
172 *++d = c;
174 if (c == '\n' || c == '\r')
176 /* Handle DOS line endings. */
177 if (c == '\r' && s != buffer->rlimit && s[1] == '\n')
178 s++;
179 if (s == buffer->rlimit)
180 break;
182 /* Escaped? */
183 p = d;
184 while (p != buffer->next_line && is_nvspace (p[-1]))
185 p--;
186 if (p == buffer->next_line || p[-1] != '\\')
187 break;
189 add_line_note (buffer, p - 1, p != d ? ' ': '\\');
190 d = p - 2;
191 buffer->next_line = p - 1;
193 else if (c == '?' && s[1] == '?' && _cpp_trigraph_map[s[2]])
195 /* Add a note regardless, for the benefit of -Wtrigraphs. */
196 add_line_note (buffer, d, s[2]);
197 if (CPP_OPTION (pfile, trigraphs))
199 *d = _cpp_trigraph_map[s[2]];
200 s += 2;
205 else
208 s++;
209 while (*s != '\n' && *s != '\r');
210 d = (uchar *) s;
212 /* Handle DOS line endings. */
213 if (*s == '\r' && s != buffer->rlimit && s[1] == '\n')
214 s++;
217 done:
218 *d = '\n';
219 /* A sentinel note that should never be processed. */
220 add_line_note (buffer, d + 1, '\n');
221 buffer->next_line = s + 1;
224 /* Return true if the trigraph indicated by NOTE should be warned
225 about in a comment. */
226 static bool
227 warn_in_comment (cpp_reader *pfile, _cpp_line_note *note)
229 const uchar *p;
231 /* Within comments we don't warn about trigraphs, unless the
232 trigraph forms an escaped newline, as that may change
233 behavior. */
234 if (note->type != '/')
235 return false;
237 /* If -trigraphs, then this was an escaped newline iff the next note
238 is coincident. */
239 if (CPP_OPTION (pfile, trigraphs))
240 return note[1].pos == note->pos;
242 /* Otherwise, see if this forms an escaped newline. */
243 p = note->pos + 3;
244 while (is_nvspace (*p))
245 p++;
247 /* There might have been escaped newlines between the trigraph and the
248 newline we found. Hence the position test. */
249 return (*p == '\n' && p < note[1].pos);
252 /* Process the notes created by add_line_note as far as the current
253 location. */
254 void
255 _cpp_process_line_notes (cpp_reader *pfile, int in_comment)
257 cpp_buffer *buffer = pfile->buffer;
259 for (;;)
261 _cpp_line_note *note = &buffer->notes[buffer->cur_note];
262 unsigned int col;
264 if (note->pos > buffer->cur)
265 break;
267 buffer->cur_note++;
268 col = CPP_BUF_COLUMN (buffer, note->pos + 1);
270 if (note->type == '\\' || note->type == ' ')
272 if (note->type == ' ' && !in_comment)
273 cpp_error_with_line (pfile, CPP_DL_WARNING, pfile->line_table->highest_line, col,
274 "backslash and newline separated by space");
276 if (buffer->next_line > buffer->rlimit)
278 cpp_error_with_line (pfile, CPP_DL_PEDWARN, pfile->line_table->highest_line, col,
279 "backslash-newline at end of file");
280 /* Prevent "no newline at end of file" warning. */
281 buffer->next_line = buffer->rlimit;
284 buffer->line_base = note->pos;
285 CPP_INCREMENT_LINE (pfile, 0);
287 else if (_cpp_trigraph_map[note->type])
289 if (CPP_OPTION (pfile, warn_trigraphs)
290 && (!in_comment || warn_in_comment (pfile, note)))
292 if (CPP_OPTION (pfile, trigraphs))
293 cpp_error_with_line (pfile, CPP_DL_WARNING, pfile->line_table->highest_line, col,
294 "trigraph ??%c converted to %c",
295 note->type,
296 (int) _cpp_trigraph_map[note->type]);
297 else
299 cpp_error_with_line
300 (pfile, CPP_DL_WARNING, pfile->line_table->highest_line, col,
301 "trigraph ??%c ignored, use -trigraphs to enable",
302 note->type);
306 else
307 abort ();
311 /* Skip a C-style block comment. We find the end of the comment by
312 seeing if an asterisk is before every '/' we encounter. Returns
313 nonzero if comment terminated by EOF, zero otherwise.
315 Buffer->cur points to the initial asterisk of the comment. */
316 bool
317 _cpp_skip_block_comment (cpp_reader *pfile)
319 cpp_buffer *buffer = pfile->buffer;
320 const uchar *cur = buffer->cur;
321 uchar c;
323 cur++;
324 if (*cur == '/')
325 cur++;
327 for (;;)
329 /* People like decorating comments with '*', so check for '/'
330 instead for efficiency. */
331 c = *cur++;
333 if (c == '/')
335 if (cur[-2] == '*')
336 break;
338 /* Warn about potential nested comments, but not if the '/'
339 comes immediately before the true comment delimiter.
340 Don't bother to get it right across escaped newlines. */
341 if (CPP_OPTION (pfile, warn_comments)
342 && cur[0] == '*' && cur[1] != '/')
344 buffer->cur = cur;
345 cpp_error_with_line (pfile, CPP_DL_WARNING,
346 pfile->line_table->highest_line, CPP_BUF_COL (buffer),
347 "\"/*\" within comment");
350 else if (c == '\n')
352 unsigned int cols;
353 buffer->cur = cur - 1;
354 _cpp_process_line_notes (pfile, true);
355 if (buffer->next_line >= buffer->rlimit)
356 return true;
357 _cpp_clean_line (pfile);
359 cols = buffer->next_line - buffer->line_base;
360 CPP_INCREMENT_LINE (pfile, cols);
362 cur = buffer->cur;
366 buffer->cur = cur;
367 _cpp_process_line_notes (pfile, true);
368 return false;
371 /* Skip a C++ line comment, leaving buffer->cur pointing to the
372 terminating newline. Handles escaped newlines. Returns nonzero
373 if a multiline comment. */
374 static int
375 skip_line_comment (cpp_reader *pfile)
377 cpp_buffer *buffer = pfile->buffer;
378 unsigned int orig_line = pfile->line_table->highest_line;
380 while (*buffer->cur != '\n')
381 buffer->cur++;
383 _cpp_process_line_notes (pfile, true);
384 return orig_line != pfile->line_table->highest_line;
387 /* Skips whitespace, saving the next non-whitespace character. */
388 static void
389 skip_whitespace (cpp_reader *pfile, cppchar_t c)
391 cpp_buffer *buffer = pfile->buffer;
392 bool saw_NUL = false;
396 /* Horizontal space always OK. */
397 if (c == ' ' || c == '\t')
399 /* Just \f \v or \0 left. */
400 else if (c == '\0')
401 saw_NUL = true;
402 else if (pfile->state.in_directive && CPP_PEDANTIC (pfile))
403 cpp_error_with_line (pfile, CPP_DL_PEDWARN, pfile->line_table->highest_line,
404 CPP_BUF_COL (buffer),
405 "%s in preprocessing directive",
406 c == '\f' ? "form feed" : "vertical tab");
408 c = *buffer->cur++;
410 /* We only want non-vertical space, i.e. ' ' \t \f \v \0. */
411 while (is_nvspace (c));
413 if (saw_NUL)
414 cpp_error (pfile, CPP_DL_WARNING, "null character(s) ignored");
416 buffer->cur--;
419 /* See if the characters of a number token are valid in a name (no
420 '.', '+' or '-'). */
421 static int
422 name_p (cpp_reader *pfile, const cpp_string *string)
424 unsigned int i;
426 for (i = 0; i < string->len; i++)
427 if (!is_idchar (string->text[i]))
428 return 0;
430 return 1;
433 /* Returns TRUE if the sequence starting at buffer->cur is invalid in
434 an identifier. FIRST is TRUE if this starts an identifier. */
435 static bool
436 forms_identifier_p (cpp_reader *pfile, int first)
438 cpp_buffer *buffer = pfile->buffer;
440 if (*buffer->cur == '$')
442 if (!CPP_OPTION (pfile, dollars_in_ident))
443 return false;
445 buffer->cur++;
446 if (CPP_OPTION (pfile, warn_dollars) && !pfile->state.skipping)
448 CPP_OPTION (pfile, warn_dollars) = 0;
449 cpp_error (pfile, CPP_DL_PEDWARN, "'$' in identifier or number");
452 return true;
455 /* Is this a syntactically valid UCN? */
456 if (*buffer->cur == '\\'
457 && (buffer->cur[1] == 'u' || buffer->cur[1] == 'U'))
459 buffer->cur += 2;
460 if (_cpp_valid_ucn (pfile, &buffer->cur, buffer->rlimit, 1 + !first))
461 return true;
462 buffer->cur -= 2;
465 return false;
468 /* Lex an identifier starting at BUFFER->CUR - 1. */
469 static cpp_hashnode *
470 lex_identifier (cpp_reader *pfile, const uchar *base, bool starts_ucn)
472 cpp_hashnode *result;
473 const uchar *cur;
474 unsigned int len;
475 unsigned int hash = HT_HASHSTEP (0, *base);
477 cur = pfile->buffer->cur;
478 if (! starts_ucn)
479 while (ISIDNUM (*cur))
481 hash = HT_HASHSTEP (hash, *cur);
482 cur++;
484 pfile->buffer->cur = cur;
485 if (starts_ucn || forms_identifier_p (pfile, false))
487 /* Slower version for identifiers containing UCNs (or $). */
488 do {
489 while (ISIDNUM (*pfile->buffer->cur))
490 pfile->buffer->cur++;
491 } while (forms_identifier_p (pfile, false));
492 result = _cpp_interpret_identifier (pfile, base,
493 pfile->buffer->cur - base);
495 else
497 len = cur - base;
498 hash = HT_HASHFINISH (hash, len);
500 result = (cpp_hashnode *)
501 ht_lookup_with_hash (pfile->hash_table, base, len, hash, HT_ALLOC);
504 /* Rarely, identifiers require diagnostics when lexed. */
505 if (__builtin_expect ((result->flags & NODE_DIAGNOSTIC)
506 && !pfile->state.skipping, 0))
508 /* It is allowed to poison the same identifier twice. */
509 if ((result->flags & NODE_POISONED) && !pfile->state.poisoned_ok)
510 cpp_error (pfile, CPP_DL_ERROR, "attempt to use poisoned \"%s\"",
511 NODE_NAME (result));
513 /* Constraint 6.10.3.5: __VA_ARGS__ should only appear in the
514 replacement list of a variadic macro. */
515 if (result == pfile->spec_nodes.n__VA_ARGS__
516 && !pfile->state.va_args_ok)
517 cpp_error (pfile, CPP_DL_PEDWARN,
518 "__VA_ARGS__ can only appear in the expansion"
519 " of a C99 variadic macro");
522 return result;
525 /* Lex a number to NUMBER starting at BUFFER->CUR - 1. */
526 static void
527 lex_number (cpp_reader *pfile, cpp_string *number)
529 const uchar *cur;
530 const uchar *base;
531 uchar *dest;
533 base = pfile->buffer->cur - 1;
536 cur = pfile->buffer->cur;
538 /* N.B. ISIDNUM does not include $. */
539 while (ISIDNUM (*cur) || *cur == '.' || VALID_SIGN (*cur, cur[-1]))
540 cur++;
542 pfile->buffer->cur = cur;
544 while (forms_identifier_p (pfile, false));
546 number->len = cur - base;
547 dest = _cpp_unaligned_alloc (pfile, number->len + 1);
548 memcpy (dest, base, number->len);
549 dest[number->len] = '\0';
550 number->text = dest;
553 /* Create a token of type TYPE with a literal spelling. */
554 static void
555 create_literal (cpp_reader *pfile, cpp_token *token, const uchar *base,
556 unsigned int len, enum cpp_ttype type)
558 uchar *dest = _cpp_unaligned_alloc (pfile, len + 1);
560 memcpy (dest, base, len);
561 dest[len] = '\0';
562 token->type = type;
563 token->val.str.len = len;
564 token->val.str.text = dest;
567 /* Lexes a string, character constant, or angle-bracketed header file
568 name. The stored string contains the spelling, including opening
569 quote and leading any leading 'L'. It returns the type of the
570 literal, or CPP_OTHER if it was not properly terminated.
572 The spelling is NUL-terminated, but it is not guaranteed that this
573 is the first NUL since embedded NULs are preserved. */
574 static void
575 lex_string (cpp_reader *pfile, cpp_token *token, const uchar *base)
577 bool saw_NUL = false;
578 const uchar *cur;
579 cppchar_t terminator;
580 enum cpp_ttype type;
582 cur = base;
583 terminator = *cur++;
584 if (terminator == 'L')
585 terminator = *cur++;
586 if (terminator == '\"')
587 type = *base == 'L' ? CPP_WSTRING: CPP_STRING;
588 else if (terminator == '\'')
589 type = *base == 'L' ? CPP_WCHAR: CPP_CHAR;
590 else
591 terminator = '>', type = CPP_HEADER_NAME;
593 for (;;)
595 cppchar_t c = *cur++;
597 /* In #include-style directives, terminators are not escapable. */
598 if (c == '\\' && !pfile->state.angled_headers && *cur != '\n')
599 cur++;
600 else if (c == terminator)
601 break;
602 else if (c == '\n')
604 cur--;
605 type = CPP_OTHER;
606 break;
608 else if (c == '\0')
609 saw_NUL = true;
612 if (saw_NUL && !pfile->state.skipping)
613 cpp_error (pfile, CPP_DL_WARNING,
614 "null character(s) preserved in literal");
616 pfile->buffer->cur = cur;
617 create_literal (pfile, token, base, cur - base, type);
620 /* The stored comment includes the comment start and any terminator. */
621 static void
622 save_comment (cpp_reader *pfile, cpp_token *token, const unsigned char *from,
623 cppchar_t type)
625 unsigned char *buffer;
626 unsigned int len, clen;
628 len = pfile->buffer->cur - from + 1; /* + 1 for the initial '/'. */
630 /* C++ comments probably (not definitely) have moved past a new
631 line, which we don't want to save in the comment. */
632 if (is_vspace (pfile->buffer->cur[-1]))
633 len--;
635 /* If we are currently in a directive, then we need to store all
636 C++ comments as C comments internally, and so we need to
637 allocate a little extra space in that case.
639 Note that the only time we encounter a directive here is
640 when we are saving comments in a "#define". */
641 clen = (pfile->state.in_directive && type == '/') ? len + 2 : len;
643 buffer = _cpp_unaligned_alloc (pfile, clen);
645 token->type = CPP_COMMENT;
646 token->val.str.len = clen;
647 token->val.str.text = buffer;
649 buffer[0] = '/';
650 memcpy (buffer + 1, from, len - 1);
652 /* Finish conversion to a C comment, if necessary. */
653 if (pfile->state.in_directive && type == '/')
655 buffer[1] = '*';
656 buffer[clen - 2] = '*';
657 buffer[clen - 1] = '/';
661 /* Allocate COUNT tokens for RUN. */
662 void
663 _cpp_init_tokenrun (tokenrun *run, unsigned int count)
665 run->base = XNEWVEC (cpp_token, count);
666 run->limit = run->base + count;
667 run->next = NULL;
670 /* Returns the next tokenrun, or creates one if there is none. */
671 static tokenrun *
672 next_tokenrun (tokenrun *run)
674 if (run->next == NULL)
676 run->next = XNEW (tokenrun);
677 run->next->prev = run;
678 _cpp_init_tokenrun (run->next, 250);
681 return run->next;
684 /* Allocate a single token that is invalidated at the same time as the
685 rest of the tokens on the line. Has its line and col set to the
686 same as the last lexed token, so that diagnostics appear in the
687 right place. */
688 cpp_token *
689 _cpp_temp_token (cpp_reader *pfile)
691 cpp_token *old, *result;
693 old = pfile->cur_token - 1;
694 if (pfile->cur_token == pfile->cur_run->limit)
696 pfile->cur_run = next_tokenrun (pfile->cur_run);
697 pfile->cur_token = pfile->cur_run->base;
700 result = pfile->cur_token++;
701 result->src_loc = old->src_loc;
702 return result;
705 /* Lex a token into RESULT (external interface). Takes care of issues
706 like directive handling, token lookahead, multiple include
707 optimization and skipping. */
708 const cpp_token *
709 _cpp_lex_token (cpp_reader *pfile)
711 cpp_token *result;
713 for (;;)
715 if (pfile->cur_token == pfile->cur_run->limit)
717 pfile->cur_run = next_tokenrun (pfile->cur_run);
718 pfile->cur_token = pfile->cur_run->base;
721 if (pfile->lookaheads)
723 pfile->lookaheads--;
724 result = pfile->cur_token++;
726 else
727 result = _cpp_lex_direct (pfile);
729 if (result->flags & BOL)
731 /* Is this a directive. If _cpp_handle_directive returns
732 false, it is an assembler #. */
733 if (result->type == CPP_HASH
734 /* 6.10.3 p 11: Directives in a list of macro arguments
735 gives undefined behavior. This implementation
736 handles the directive as normal. */
737 && pfile->state.parsing_args != 1
738 && _cpp_handle_directive (pfile, result->flags & PREV_WHITE))
740 if (pfile->directive_result.type == CPP_PADDING)
741 continue;
742 else
744 result = &pfile->directive_result;
745 break;
749 if (pfile->cb.line_change && !pfile->state.skipping)
750 pfile->cb.line_change (pfile, result, pfile->state.parsing_args);
753 /* We don't skip tokens in directives. */
754 if (pfile->state.in_directive)
755 break;
757 /* Outside a directive, invalidate controlling macros. At file
758 EOF, _cpp_lex_direct takes care of popping the buffer, so we never
759 get here and MI optimization works. */
760 pfile->mi_valid = false;
762 if (!pfile->state.skipping || result->type == CPP_EOF)
763 break;
766 return result;
769 /* Returns true if a fresh line has been loaded. */
770 bool
771 _cpp_get_fresh_line (cpp_reader *pfile)
773 int return_at_eof;
775 /* We can't get a new line until we leave the current directive. */
776 if (pfile->state.in_directive)
777 return false;
779 for (;;)
781 cpp_buffer *buffer = pfile->buffer;
783 if (!buffer->need_line)
784 return true;
786 if (buffer->next_line < buffer->rlimit)
788 _cpp_clean_line (pfile);
789 return true;
792 /* First, get out of parsing arguments state. */
793 if (pfile->state.parsing_args)
794 return false;
796 /* End of buffer. Non-empty files should end in a newline. */
797 if (buffer->buf != buffer->rlimit
798 && buffer->next_line > buffer->rlimit
799 && !buffer->from_stage3)
801 /* Only warn once. */
802 buffer->next_line = buffer->rlimit;
803 cpp_error_with_line (pfile, CPP_DL_PEDWARN, pfile->line_table->highest_line,
804 CPP_BUF_COLUMN (buffer, buffer->cur),
805 "no newline at end of file");
808 return_at_eof = buffer->return_at_eof;
809 _cpp_pop_buffer (pfile);
810 if (pfile->buffer == NULL || return_at_eof)
811 return false;
815 #define IF_NEXT_IS(CHAR, THEN_TYPE, ELSE_TYPE) \
816 do \
818 result->type = ELSE_TYPE; \
819 if (*buffer->cur == CHAR) \
820 buffer->cur++, result->type = THEN_TYPE; \
822 while (0)
824 /* Lex a token into pfile->cur_token, which is also incremented, to
825 get diagnostics pointing to the correct location.
827 Does not handle issues such as token lookahead, multiple-include
828 optimization, directives, skipping etc. This function is only
829 suitable for use by _cpp_lex_token, and in special cases like
830 lex_expansion_token which doesn't care for any of these issues.
832 When meeting a newline, returns CPP_EOF if parsing a directive,
833 otherwise returns to the start of the token buffer if permissible.
834 Returns the location of the lexed token. */
835 cpp_token *
836 _cpp_lex_direct (cpp_reader *pfile)
838 cppchar_t c;
839 cpp_buffer *buffer;
840 const unsigned char *comment_start;
841 cpp_token *result = pfile->cur_token++;
843 fresh_line:
844 result->flags = 0;
845 buffer = pfile->buffer;
846 if (buffer->need_line)
848 if (!_cpp_get_fresh_line (pfile))
850 result->type = CPP_EOF;
851 if (!pfile->state.in_directive)
853 /* Tell the compiler the line number of the EOF token. */
854 result->src_loc = pfile->line_table->highest_line;
855 result->flags = BOL;
857 return result;
859 if (!pfile->keep_tokens)
861 pfile->cur_run = &pfile->base_run;
862 result = pfile->base_run.base;
863 pfile->cur_token = result + 1;
865 result->flags = BOL;
866 if (pfile->state.parsing_args == 2)
867 result->flags |= PREV_WHITE;
869 buffer = pfile->buffer;
870 update_tokens_line:
871 result->src_loc = pfile->line_table->highest_line;
873 skipped_white:
874 if (buffer->cur >= buffer->notes[buffer->cur_note].pos
875 && !pfile->overlaid_buffer)
877 _cpp_process_line_notes (pfile, false);
878 result->src_loc = pfile->line_table->highest_line;
880 c = *buffer->cur++;
882 LINEMAP_POSITION_FOR_COLUMN (result->src_loc, pfile->line_table,
883 CPP_BUF_COLUMN (buffer, buffer->cur));
885 switch (c)
887 case ' ': case '\t': case '\f': case '\v': case '\0':
888 result->flags |= PREV_WHITE;
889 skip_whitespace (pfile, c);
890 goto skipped_white;
892 case '\n':
893 if (buffer->cur < buffer->rlimit)
894 CPP_INCREMENT_LINE (pfile, 0);
895 buffer->need_line = true;
896 goto fresh_line;
898 case '0': case '1': case '2': case '3': case '4':
899 case '5': case '6': case '7': case '8': case '9':
900 result->type = CPP_NUMBER;
901 lex_number (pfile, &result->val.str);
902 break;
904 case 'L':
905 /* 'L' may introduce wide characters or strings. */
906 if (*buffer->cur == '\'' || *buffer->cur == '"')
908 lex_string (pfile, result, buffer->cur - 1);
909 break;
911 /* Fall through. */
913 case '_':
914 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
915 case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
916 case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
917 case 's': case 't': case 'u': case 'v': case 'w': case 'x':
918 case 'y': case 'z':
919 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
920 case 'G': case 'H': case 'I': case 'J': case 'K':
921 case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
922 case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
923 case 'Y': case 'Z':
924 result->type = CPP_NAME;
925 result->val.node = lex_identifier (pfile, buffer->cur - 1, false);
927 /* Convert named operators to their proper types. */
928 if (result->val.node->flags & NODE_OPERATOR)
930 result->flags |= NAMED_OP;
931 result->type = result->val.node->directive_index;
933 break;
935 case '\'':
936 case '"':
937 lex_string (pfile, result, buffer->cur - 1);
938 break;
940 case '/':
941 /* A potential block or line comment. */
942 comment_start = buffer->cur;
943 c = *buffer->cur;
945 if (c == '*')
947 if (_cpp_skip_block_comment (pfile))
948 cpp_error (pfile, CPP_DL_ERROR, "unterminated comment");
950 else if (c == '/' && (CPP_OPTION (pfile, cplusplus_comments)
951 || cpp_in_system_header (pfile)))
953 /* Warn about comments only if pedantically GNUC89, and not
954 in system headers. */
955 if (CPP_OPTION (pfile, lang) == CLK_GNUC89 && CPP_PEDANTIC (pfile)
956 && ! buffer->warned_cplusplus_comments)
958 cpp_error (pfile, CPP_DL_PEDWARN,
959 "C++ style comments are not allowed in ISO C90");
960 cpp_error (pfile, CPP_DL_PEDWARN,
961 "(this will be reported only once per input file)");
962 buffer->warned_cplusplus_comments = 1;
965 if (skip_line_comment (pfile) && CPP_OPTION (pfile, warn_comments))
966 cpp_error (pfile, CPP_DL_WARNING, "multi-line comment");
968 else if (c == '=')
970 buffer->cur++;
971 result->type = CPP_DIV_EQ;
972 break;
974 else
976 result->type = CPP_DIV;
977 break;
980 if (!pfile->state.save_comments)
982 result->flags |= PREV_WHITE;
983 goto update_tokens_line;
986 /* Save the comment as a token in its own right. */
987 save_comment (pfile, result, comment_start, c);
988 break;
990 case '<':
991 if (pfile->state.angled_headers)
993 lex_string (pfile, result, buffer->cur - 1);
994 break;
997 result->type = CPP_LESS;
998 if (*buffer->cur == '=')
999 buffer->cur++, result->type = CPP_LESS_EQ;
1000 else if (*buffer->cur == '<')
1002 buffer->cur++;
1003 IF_NEXT_IS ('=', CPP_LSHIFT_EQ, CPP_LSHIFT);
1005 else if (*buffer->cur == '?' && CPP_OPTION (pfile, cplusplus))
1007 buffer->cur++;
1008 IF_NEXT_IS ('=', CPP_MIN_EQ, CPP_MIN);
1010 else if (CPP_OPTION (pfile, digraphs))
1012 if (*buffer->cur == ':')
1014 buffer->cur++;
1015 result->flags |= DIGRAPH;
1016 result->type = CPP_OPEN_SQUARE;
1018 else if (*buffer->cur == '%')
1020 buffer->cur++;
1021 result->flags |= DIGRAPH;
1022 result->type = CPP_OPEN_BRACE;
1025 break;
1027 case '>':
1028 result->type = CPP_GREATER;
1029 if (*buffer->cur == '=')
1030 buffer->cur++, result->type = CPP_GREATER_EQ;
1031 else if (*buffer->cur == '>')
1033 buffer->cur++;
1034 IF_NEXT_IS ('=', CPP_RSHIFT_EQ, CPP_RSHIFT);
1036 else if (*buffer->cur == '?' && CPP_OPTION (pfile, cplusplus))
1038 buffer->cur++;
1039 IF_NEXT_IS ('=', CPP_MAX_EQ, CPP_MAX);
1041 break;
1043 case '%':
1044 result->type = CPP_MOD;
1045 if (*buffer->cur == '=')
1046 buffer->cur++, result->type = CPP_MOD_EQ;
1047 else if (CPP_OPTION (pfile, digraphs))
1049 if (*buffer->cur == ':')
1051 buffer->cur++;
1052 result->flags |= DIGRAPH;
1053 result->type = CPP_HASH;
1054 if (*buffer->cur == '%' && buffer->cur[1] == ':')
1055 buffer->cur += 2, result->type = CPP_PASTE;
1057 else if (*buffer->cur == '>')
1059 buffer->cur++;
1060 result->flags |= DIGRAPH;
1061 result->type = CPP_CLOSE_BRACE;
1064 break;
1066 case '.':
1067 result->type = CPP_DOT;
1068 if (ISDIGIT (*buffer->cur))
1070 result->type = CPP_NUMBER;
1071 lex_number (pfile, &result->val.str);
1073 else if (*buffer->cur == '.' && buffer->cur[1] == '.')
1074 buffer->cur += 2, result->type = CPP_ELLIPSIS;
1075 else if (*buffer->cur == '*' && CPP_OPTION (pfile, cplusplus))
1076 buffer->cur++, result->type = CPP_DOT_STAR;
1077 break;
1079 case '+':
1080 result->type = CPP_PLUS;
1081 if (*buffer->cur == '+')
1082 buffer->cur++, result->type = CPP_PLUS_PLUS;
1083 else if (*buffer->cur == '=')
1084 buffer->cur++, result->type = CPP_PLUS_EQ;
1085 break;
1087 case '-':
1088 result->type = CPP_MINUS;
1089 if (*buffer->cur == '>')
1091 buffer->cur++;
1092 result->type = CPP_DEREF;
1093 if (*buffer->cur == '*' && CPP_OPTION (pfile, cplusplus))
1094 buffer->cur++, result->type = CPP_DEREF_STAR;
1096 else if (*buffer->cur == '-')
1097 buffer->cur++, result->type = CPP_MINUS_MINUS;
1098 else if (*buffer->cur == '=')
1099 buffer->cur++, result->type = CPP_MINUS_EQ;
1100 break;
1102 case '&':
1103 result->type = CPP_AND;
1104 if (*buffer->cur == '&')
1105 buffer->cur++, result->type = CPP_AND_AND;
1106 else if (*buffer->cur == '=')
1107 buffer->cur++, result->type = CPP_AND_EQ;
1108 break;
1110 case '|':
1111 result->type = CPP_OR;
1112 if (*buffer->cur == '|')
1113 buffer->cur++, result->type = CPP_OR_OR;
1114 else if (*buffer->cur == '=')
1115 buffer->cur++, result->type = CPP_OR_EQ;
1116 break;
1118 case ':':
1119 result->type = CPP_COLON;
1120 if (*buffer->cur == ':' && CPP_OPTION (pfile, cplusplus))
1121 buffer->cur++, result->type = CPP_SCOPE;
1122 else if (*buffer->cur == '>' && CPP_OPTION (pfile, digraphs))
1124 buffer->cur++;
1125 result->flags |= DIGRAPH;
1126 result->type = CPP_CLOSE_SQUARE;
1128 break;
1130 case '*': IF_NEXT_IS ('=', CPP_MULT_EQ, CPP_MULT); break;
1131 case '=': IF_NEXT_IS ('=', CPP_EQ_EQ, CPP_EQ); break;
1132 case '!': IF_NEXT_IS ('=', CPP_NOT_EQ, CPP_NOT); break;
1133 case '^': IF_NEXT_IS ('=', CPP_XOR_EQ, CPP_XOR); break;
1134 case '#': IF_NEXT_IS ('#', CPP_PASTE, CPP_HASH); break;
1136 case '?': result->type = CPP_QUERY; break;
1137 case '~': result->type = CPP_COMPL; break;
1138 case ',': result->type = CPP_COMMA; break;
1139 case '(': result->type = CPP_OPEN_PAREN; break;
1140 case ')': result->type = CPP_CLOSE_PAREN; break;
1141 case '[': result->type = CPP_OPEN_SQUARE; break;
1142 case ']': result->type = CPP_CLOSE_SQUARE; break;
1143 case '{': result->type = CPP_OPEN_BRACE; break;
1144 case '}': result->type = CPP_CLOSE_BRACE; break;
1145 case ';': result->type = CPP_SEMICOLON; break;
1147 /* @ is a punctuator in Objective-C. */
1148 case '@': result->type = CPP_ATSIGN; break;
1150 case '$':
1151 case '\\':
1153 const uchar *base = --buffer->cur;
1155 if (forms_identifier_p (pfile, true))
1157 result->type = CPP_NAME;
1158 result->val.node = lex_identifier (pfile, base, true);
1159 break;
1161 buffer->cur++;
1164 default:
1165 create_literal (pfile, result, buffer->cur - 1, 1, CPP_OTHER);
1166 break;
1169 return result;
1172 /* An upper bound on the number of bytes needed to spell TOKEN.
1173 Does not include preceding whitespace. */
1174 unsigned int
1175 cpp_token_len (const cpp_token *token)
1177 unsigned int len;
1179 switch (TOKEN_SPELL (token))
1181 default: len = 4; break;
1182 case SPELL_LITERAL: len = token->val.str.len; break;
1183 case SPELL_IDENT: len = NODE_LEN (token->val.node) * 10; break;
1186 return len;
1189 /* Parse UTF-8 out of NAMEP and place a \U escape in BUFFER.
1190 Return the number of bytes read out of NAME. (There are always
1191 10 bytes written to BUFFER.) */
1193 static size_t
1194 utf8_to_ucn (unsigned char *buffer, const unsigned char *name)
1196 int j;
1197 int ucn_len = 0;
1198 int ucn_len_c;
1199 unsigned t;
1200 unsigned long utf32;
1202 /* Compute the length of the UTF-8 sequence. */
1203 for (t = *name; t & 0x80; t <<= 1)
1204 ucn_len++;
1206 utf32 = *name & (0x7F >> ucn_len);
1207 for (ucn_len_c = 1; ucn_len_c < ucn_len; ucn_len_c++)
1209 utf32 = (utf32 << 6) | (*++name & 0x3F);
1211 /* Ill-formed UTF-8. */
1212 if ((*name & ~0x3F) != 0x80)
1213 abort ();
1216 *buffer++ = '\\';
1217 *buffer++ = 'U';
1218 for (j = 7; j >= 0; j--)
1219 *buffer++ = "0123456789abcdef"[(utf32 >> (4 * j)) & 0xF];
1220 return ucn_len;
1224 /* Write the spelling of a token TOKEN to BUFFER. The buffer must
1225 already contain the enough space to hold the token's spelling.
1226 Returns a pointer to the character after the last character written.
1227 FORSTRING is true if this is to be the spelling after translation
1228 phase 1 (this is different for UCNs).
1229 FIXME: Would be nice if we didn't need the PFILE argument. */
1230 unsigned char *
1231 cpp_spell_token (cpp_reader *pfile, const cpp_token *token,
1232 unsigned char *buffer, bool forstring)
1234 switch (TOKEN_SPELL (token))
1236 case SPELL_OPERATOR:
1238 const unsigned char *spelling;
1239 unsigned char c;
1241 if (token->flags & DIGRAPH)
1242 spelling
1243 = digraph_spellings[(int) token->type - (int) CPP_FIRST_DIGRAPH];
1244 else if (token->flags & NAMED_OP)
1245 goto spell_ident;
1246 else
1247 spelling = TOKEN_NAME (token);
1249 while ((c = *spelling++) != '\0')
1250 *buffer++ = c;
1252 break;
1254 spell_ident:
1255 case SPELL_IDENT:
1256 if (forstring)
1258 memcpy (buffer, NODE_NAME (token->val.node),
1259 NODE_LEN (token->val.node));
1260 buffer += NODE_LEN (token->val.node);
1262 else
1264 size_t i;
1265 const unsigned char * name = NODE_NAME (token->val.node);
1267 for (i = 0; i < NODE_LEN (token->val.node); i++)
1268 if (name[i] & ~0x7F)
1270 i += utf8_to_ucn (buffer, name + i) - 1;
1271 buffer += 10;
1273 else
1274 *buffer++ = NODE_NAME (token->val.node)[i];
1276 break;
1278 case SPELL_LITERAL:
1279 memcpy (buffer, token->val.str.text, token->val.str.len);
1280 buffer += token->val.str.len;
1281 break;
1283 case SPELL_NONE:
1284 cpp_error (pfile, CPP_DL_ICE,
1285 "unspellable token %s", TOKEN_NAME (token));
1286 break;
1289 return buffer;
1292 /* Returns TOKEN spelt as a null-terminated string. The string is
1293 freed when the reader is destroyed. Useful for diagnostics. */
1294 unsigned char *
1295 cpp_token_as_text (cpp_reader *pfile, const cpp_token *token)
1297 unsigned int len = cpp_token_len (token) + 1;
1298 unsigned char *start = _cpp_unaligned_alloc (pfile, len), *end;
1300 end = cpp_spell_token (pfile, token, start, false);
1301 end[0] = '\0';
1303 return start;
1306 /* Used by C front ends, which really should move to using
1307 cpp_token_as_text. */
1308 const char *
1309 cpp_type2name (enum cpp_ttype type)
1311 return (const char *) token_spellings[type].name;
1314 /* Writes the spelling of token to FP, without any preceding space.
1315 Separated from cpp_spell_token for efficiency - to avoid stdio
1316 double-buffering. */
1317 void
1318 cpp_output_token (const cpp_token *token, FILE *fp)
1320 switch (TOKEN_SPELL (token))
1322 case SPELL_OPERATOR:
1324 const unsigned char *spelling;
1325 int c;
1327 if (token->flags & DIGRAPH)
1328 spelling
1329 = digraph_spellings[(int) token->type - (int) CPP_FIRST_DIGRAPH];
1330 else if (token->flags & NAMED_OP)
1331 goto spell_ident;
1332 else
1333 spelling = TOKEN_NAME (token);
1335 c = *spelling;
1337 putc (c, fp);
1338 while ((c = *++spelling) != '\0');
1340 break;
1342 spell_ident:
1343 case SPELL_IDENT:
1345 size_t i;
1346 const unsigned char * name = NODE_NAME (token->val.node);
1348 for (i = 0; i < NODE_LEN (token->val.node); i++)
1349 if (name[i] & ~0x7F)
1351 unsigned char buffer[10];
1352 i += utf8_to_ucn (buffer, name + i) - 1;
1353 fwrite (buffer, 1, 10, fp);
1355 else
1356 fputc (NODE_NAME (token->val.node)[i], fp);
1358 break;
1360 case SPELL_LITERAL:
1361 fwrite (token->val.str.text, 1, token->val.str.len, fp);
1362 break;
1364 case SPELL_NONE:
1365 /* An error, most probably. */
1366 break;
1370 /* Compare two tokens. */
1372 _cpp_equiv_tokens (const cpp_token *a, const cpp_token *b)
1374 if (a->type == b->type && a->flags == b->flags)
1375 switch (TOKEN_SPELL (a))
1377 default: /* Keep compiler happy. */
1378 case SPELL_OPERATOR:
1379 return 1;
1380 case SPELL_NONE:
1381 return (a->type != CPP_MACRO_ARG || a->val.arg_no == b->val.arg_no);
1382 case SPELL_IDENT:
1383 return a->val.node == b->val.node;
1384 case SPELL_LITERAL:
1385 return (a->val.str.len == b->val.str.len
1386 && !memcmp (a->val.str.text, b->val.str.text,
1387 a->val.str.len));
1390 return 0;
1393 /* Returns nonzero if a space should be inserted to avoid an
1394 accidental token paste for output. For simplicity, it is
1395 conservative, and occasionally advises a space where one is not
1396 needed, e.g. "." and ".2". */
1398 cpp_avoid_paste (cpp_reader *pfile, const cpp_token *token1,
1399 const cpp_token *token2)
1401 enum cpp_ttype a = token1->type, b = token2->type;
1402 cppchar_t c;
1404 if (token1->flags & NAMED_OP)
1405 a = CPP_NAME;
1406 if (token2->flags & NAMED_OP)
1407 b = CPP_NAME;
1409 c = EOF;
1410 if (token2->flags & DIGRAPH)
1411 c = digraph_spellings[(int) b - (int) CPP_FIRST_DIGRAPH][0];
1412 else if (token_spellings[b].category == SPELL_OPERATOR)
1413 c = token_spellings[b].name[0];
1415 /* Quickly get everything that can paste with an '='. */
1416 if ((int) a <= (int) CPP_LAST_EQ && c == '=')
1417 return 1;
1419 switch (a)
1421 case CPP_GREATER: return c == '>' || c == '?';
1422 case CPP_LESS: return c == '<' || c == '?' || c == '%' || c == ':';
1423 case CPP_PLUS: return c == '+';
1424 case CPP_MINUS: return c == '-' || c == '>';
1425 case CPP_DIV: return c == '/' || c == '*'; /* Comments. */
1426 case CPP_MOD: return c == ':' || c == '>';
1427 case CPP_AND: return c == '&';
1428 case CPP_OR: return c == '|';
1429 case CPP_COLON: return c == ':' || c == '>';
1430 case CPP_DEREF: return c == '*';
1431 case CPP_DOT: return c == '.' || c == '%' || b == CPP_NUMBER;
1432 case CPP_HASH: return c == '#' || c == '%'; /* Digraph form. */
1433 case CPP_NAME: return ((b == CPP_NUMBER
1434 && name_p (pfile, &token2->val.str))
1435 || b == CPP_NAME
1436 || b == CPP_CHAR || b == CPP_STRING); /* L */
1437 case CPP_NUMBER: return (b == CPP_NUMBER || b == CPP_NAME
1438 || c == '.' || c == '+' || c == '-');
1439 /* UCNs */
1440 case CPP_OTHER: return ((token1->val.str.text[0] == '\\'
1441 && b == CPP_NAME)
1442 || (CPP_OPTION (pfile, objc)
1443 && token1->val.str.text[0] == '@'
1444 && (b == CPP_NAME || b == CPP_STRING)));
1445 default: break;
1448 return 0;
1451 /* Output all the remaining tokens on the current line, and a newline
1452 character, to FP. Leading whitespace is removed. If there are
1453 macros, special token padding is not performed. */
1454 void
1455 cpp_output_line (cpp_reader *pfile, FILE *fp)
1457 const cpp_token *token;
1459 token = cpp_get_token (pfile);
1460 while (token->type != CPP_EOF)
1462 cpp_output_token (token, fp);
1463 token = cpp_get_token (pfile);
1464 if (token->flags & PREV_WHITE)
1465 putc (' ', fp);
1468 putc ('\n', fp);
1471 /* Memory buffers. Changing these three constants can have a dramatic
1472 effect on performance. The values here are reasonable defaults,
1473 but might be tuned. If you adjust them, be sure to test across a
1474 range of uses of cpplib, including heavy nested function-like macro
1475 expansion. Also check the change in peak memory usage (NJAMD is a
1476 good tool for this). */
1477 #define MIN_BUFF_SIZE 8000
1478 #define BUFF_SIZE_UPPER_BOUND(MIN_SIZE) (MIN_BUFF_SIZE + (MIN_SIZE) * 3 / 2)
1479 #define EXTENDED_BUFF_SIZE(BUFF, MIN_EXTRA) \
1480 (MIN_EXTRA + ((BUFF)->limit - (BUFF)->cur) * 2)
1482 #if MIN_BUFF_SIZE > BUFF_SIZE_UPPER_BOUND (0)
1483 #error BUFF_SIZE_UPPER_BOUND must be at least as large as MIN_BUFF_SIZE!
1484 #endif
1486 /* Create a new allocation buffer. Place the control block at the end
1487 of the buffer, so that buffer overflows will cause immediate chaos. */
1488 static _cpp_buff *
1489 new_buff (size_t len)
1491 _cpp_buff *result;
1492 unsigned char *base;
1494 if (len < MIN_BUFF_SIZE)
1495 len = MIN_BUFF_SIZE;
1496 len = CPP_ALIGN (len);
1498 base = xmalloc (len + sizeof (_cpp_buff));
1499 result = (_cpp_buff *) (base + len);
1500 result->base = base;
1501 result->cur = base;
1502 result->limit = base + len;
1503 result->next = NULL;
1504 return result;
1507 /* Place a chain of unwanted allocation buffers on the free list. */
1508 void
1509 _cpp_release_buff (cpp_reader *pfile, _cpp_buff *buff)
1511 _cpp_buff *end = buff;
1513 while (end->next)
1514 end = end->next;
1515 end->next = pfile->free_buffs;
1516 pfile->free_buffs = buff;
1519 /* Return a free buffer of size at least MIN_SIZE. */
1520 _cpp_buff *
1521 _cpp_get_buff (cpp_reader *pfile, size_t min_size)
1523 _cpp_buff *result, **p;
1525 for (p = &pfile->free_buffs;; p = &(*p)->next)
1527 size_t size;
1529 if (*p == NULL)
1530 return new_buff (min_size);
1531 result = *p;
1532 size = result->limit - result->base;
1533 /* Return a buffer that's big enough, but don't waste one that's
1534 way too big. */
1535 if (size >= min_size && size <= BUFF_SIZE_UPPER_BOUND (min_size))
1536 break;
1539 *p = result->next;
1540 result->next = NULL;
1541 result->cur = result->base;
1542 return result;
1545 /* Creates a new buffer with enough space to hold the uncommitted
1546 remaining bytes of BUFF, and at least MIN_EXTRA more bytes. Copies
1547 the excess bytes to the new buffer. Chains the new buffer after
1548 BUFF, and returns the new buffer. */
1549 _cpp_buff *
1550 _cpp_append_extend_buff (cpp_reader *pfile, _cpp_buff *buff, size_t min_extra)
1552 size_t size = EXTENDED_BUFF_SIZE (buff, min_extra);
1553 _cpp_buff *new_buff = _cpp_get_buff (pfile, size);
1555 buff->next = new_buff;
1556 memcpy (new_buff->base, buff->cur, BUFF_ROOM (buff));
1557 return new_buff;
1560 /* Creates a new buffer with enough space to hold the uncommitted
1561 remaining bytes of the buffer pointed to by BUFF, and at least
1562 MIN_EXTRA more bytes. Copies the excess bytes to the new buffer.
1563 Chains the new buffer before the buffer pointed to by BUFF, and
1564 updates the pointer to point to the new buffer. */
1565 void
1566 _cpp_extend_buff (cpp_reader *pfile, _cpp_buff **pbuff, size_t min_extra)
1568 _cpp_buff *new_buff, *old_buff = *pbuff;
1569 size_t size = EXTENDED_BUFF_SIZE (old_buff, min_extra);
1571 new_buff = _cpp_get_buff (pfile, size);
1572 memcpy (new_buff->base, old_buff->cur, BUFF_ROOM (old_buff));
1573 new_buff->next = old_buff;
1574 *pbuff = new_buff;
1577 /* Free a chain of buffers starting at BUFF. */
1578 void
1579 _cpp_free_buff (_cpp_buff *buff)
1581 _cpp_buff *next;
1583 for (; buff; buff = next)
1585 next = buff->next;
1586 free (buff->base);
1590 /* Allocate permanent, unaligned storage of length LEN. */
1591 unsigned char *
1592 _cpp_unaligned_alloc (cpp_reader *pfile, size_t len)
1594 _cpp_buff *buff = pfile->u_buff;
1595 unsigned char *result = buff->cur;
1597 if (len > (size_t) (buff->limit - result))
1599 buff = _cpp_get_buff (pfile, len);
1600 buff->next = pfile->u_buff;
1601 pfile->u_buff = buff;
1602 result = buff->cur;
1605 buff->cur = result + len;
1606 return result;
1609 /* Allocate permanent, unaligned storage of length LEN from a_buff.
1610 That buffer is used for growing allocations when saving macro
1611 replacement lists in a #define, and when parsing an answer to an
1612 assertion in #assert, #unassert or #if (and therefore possibly
1613 whilst expanding macros). It therefore must not be used by any
1614 code that they might call: specifically the lexer and the guts of
1615 the macro expander.
1617 All existing other uses clearly fit this restriction: storing
1618 registered pragmas during initialization. */
1619 unsigned char *
1620 _cpp_aligned_alloc (cpp_reader *pfile, size_t len)
1622 _cpp_buff *buff = pfile->a_buff;
1623 unsigned char *result = buff->cur;
1625 if (len > (size_t) (buff->limit - result))
1627 buff = _cpp_get_buff (pfile, len);
1628 buff->next = pfile->a_buff;
1629 pfile->a_buff = buff;
1630 result = buff->cur;
1633 buff->cur = result + len;
1634 return result;
1637 /* Say which field of TOK is in use. */
1639 enum cpp_token_fld_kind
1640 cpp_token_val_index (cpp_token *tok)
1642 switch (TOKEN_SPELL (tok))
1644 case SPELL_IDENT:
1645 return CPP_TOKEN_FLD_NODE;
1646 case SPELL_LITERAL:
1647 return CPP_TOKEN_FLD_STR;
1648 case SPELL_NONE:
1649 if (tok->type == CPP_MACRO_ARG)
1650 return CPP_TOKEN_FLD_ARG_NO;
1651 else if (tok->type == CPP_PADDING)
1652 return CPP_TOKEN_FLD_SOURCE;
1653 else if (tok->type == CPP_PRAGMA)
1654 return CPP_TOKEN_FLD_STR;
1655 /* else fall through */
1656 default:
1657 return CPP_TOKEN_FLD_NONE;