* config.gcc: Add an extra_header for ARM targets.
[official-gcc.git] / gcc / cpplex.c
blob6933e75035bc11e6fed1d6c2407a553c620b76e8
1 /* CPP Library - lexical analysis.
2 Copyright (C) 2000, 2001, 2002, 2003 Free Software Foundation, Inc.
3 Contributed by Per Bothner, 1994-95.
4 Based on CCCP program by Paul Rubin, June 1986
5 Adapted to ANSI C, Richard Stallman, Jan 1987
6 Broken out to separate file, Zack Weinberg, Mar 2000
8 This program is free software; you can redistribute it and/or modify it
9 under the terms of the GNU General Public License as published by the
10 Free Software Foundation; either version 2, or (at your option) any
11 later version.
13 This program is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with this program; if not, write to the Free Software
20 Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "tm.h"
26 #include "cpplib.h"
27 #include "cpphash.h"
29 enum spell_type
31 SPELL_OPERATOR = 0,
32 SPELL_IDENT,
33 SPELL_LITERAL,
34 SPELL_NONE
37 struct token_spelling
39 enum spell_type category;
40 const unsigned char *name;
43 static const unsigned char *const digraph_spellings[] =
44 { U"%:", U"%:%:", U"<:", U":>", U"<%", U"%>" };
46 #define OP(e, s) { SPELL_OPERATOR, U s },
47 #define TK(e, s) { s, U #e },
48 static const struct token_spelling token_spellings[N_TTYPES] = { TTYPE_TABLE };
49 #undef OP
50 #undef TK
52 #define TOKEN_SPELL(token) (token_spellings[(token)->type].category)
53 #define TOKEN_NAME(token) (token_spellings[(token)->type].name)
55 static void add_line_note (cpp_buffer *, const uchar *, unsigned int);
56 static int skip_line_comment (cpp_reader *);
57 static void skip_whitespace (cpp_reader *, cppchar_t);
58 static cpp_hashnode *lex_identifier (cpp_reader *, const uchar *);
59 static void lex_number (cpp_reader *, cpp_string *);
60 static bool forms_identifier_p (cpp_reader *, int);
61 static void lex_string (cpp_reader *, cpp_token *, const uchar *);
62 static void save_comment (cpp_reader *, cpp_token *, const uchar *, cppchar_t);
63 static void create_literal (cpp_reader *, cpp_token *, const uchar *,
64 unsigned int, enum cpp_ttype);
65 static bool warn_in_comment (cpp_reader *, _cpp_line_note *);
66 static int name_p (cpp_reader *, const cpp_string *);
67 static cppchar_t maybe_read_ucn (cpp_reader *, const uchar **);
68 static tokenrun *next_tokenrun (tokenrun *);
70 static unsigned int hex_digit_value (unsigned int);
71 static _cpp_buff *new_buff (size_t);
74 /* Utility routine:
76 Compares, the token TOKEN to the NUL-terminated string STRING.
77 TOKEN must be a CPP_NAME. Returns 1 for equal, 0 for unequal. */
78 int
79 cpp_ideq (const cpp_token *token, const char *string)
81 if (token->type != CPP_NAME)
82 return 0;
84 return !ustrcmp (NODE_NAME (token->val.node), (const uchar *) string);
87 /* Record a note TYPE at byte POS into the current cleaned logical
88 line. */
89 static void
90 add_line_note (cpp_buffer *buffer, const uchar *pos, unsigned int type)
92 if (buffer->notes_used == buffer->notes_cap)
94 buffer->notes_cap = buffer->notes_cap * 2 + 200;
95 buffer->notes = (_cpp_line_note *)
96 xrealloc (buffer->notes, buffer->notes_cap * sizeof (_cpp_line_note));
99 buffer->notes[buffer->notes_used].pos = pos;
100 buffer->notes[buffer->notes_used].type = type;
101 buffer->notes_used++;
104 /* Returns with a logical line that contains no escaped newlines or
105 trigraphs. This is a time-critical inner loop. */
106 void
107 _cpp_clean_line (cpp_reader *pfile)
109 cpp_buffer *buffer;
110 const uchar *s;
111 uchar c, *d, *p;
113 buffer = pfile->buffer;
114 buffer->cur_note = buffer->notes_used = 0;
115 buffer->cur = buffer->line_base = buffer->next_line;
116 buffer->need_line = false;
117 s = buffer->next_line - 1;
119 if (!buffer->from_stage3)
121 d = (uchar *) s;
123 for (;;)
125 c = *++s;
126 *++d = c;
128 if (c == '\n' || c == '\r')
130 /* Handle DOS line endings. */
131 if (c == '\r' && s != buffer->rlimit && s[1] == '\n')
132 s++;
133 if (s == buffer->rlimit)
134 break;
136 /* Escaped? */
137 p = d;
138 while (p != buffer->next_line && is_nvspace (p[-1]))
139 p--;
140 if (p == buffer->next_line || p[-1] != '\\')
141 break;
143 add_line_note (buffer, p - 1, p != d ? ' ': '\\');
144 d = p - 2;
145 buffer->next_line = p - 1;
147 else if (c == '?' && s[1] == '?' && _cpp_trigraph_map[s[2]])
149 /* Add a note regardless, for the benefit of -Wtrigraphs. */
150 add_line_note (buffer, d, s[2]);
151 if (CPP_OPTION (pfile, trigraphs))
153 *d = _cpp_trigraph_map[s[2]];
154 s += 2;
159 else
162 s++;
163 while (*s != '\n' && *s != '\r');
164 d = (uchar *) s;
166 /* Handle DOS line endings. */
167 if (*s == '\r' && s != buffer->rlimit && s[1] == '\n')
168 s++;
171 *d = '\n';
172 /* A sentinel note that should never be processed. */
173 add_line_note (buffer, d + 1, '\n');
174 buffer->next_line = s + 1;
177 /* Return true if the trigraph indicated by NOTE should be warned
178 about in a comment. */
179 static bool
180 warn_in_comment (cpp_reader *pfile, _cpp_line_note *note)
182 const uchar *p;
184 /* Within comments we don't warn about trigraphs, unless the
185 trigraph forms an escaped newline, as that may change
186 behavior. */
187 if (note->type != '/')
188 return false;
190 /* If -trigraphs, then this was an escaped newline iff the next note
191 is coincident. */
192 if (CPP_OPTION (pfile, trigraphs))
193 return note[1].pos == note->pos;
195 /* Otherwise, see if this forms an escaped newline. */
196 p = note->pos + 3;
197 while (is_nvspace (*p))
198 p++;
200 /* There might have been escaped newlines between the trigraph and the
201 newline we found. Hence the position test. */
202 return (*p == '\n' && p < note[1].pos);
205 /* Process the notes created by add_line_note as far as the current
206 location. */
207 void
208 _cpp_process_line_notes (cpp_reader *pfile, int in_comment)
210 cpp_buffer *buffer = pfile->buffer;
212 for (;;)
214 _cpp_line_note *note = &buffer->notes[buffer->cur_note];
215 unsigned int col;
217 if (note->pos > buffer->cur)
218 break;
220 buffer->cur_note++;
221 col = CPP_BUF_COLUMN (buffer, note->pos + 1);
223 if (note->type == '\\' || note->type == ' ')
225 if (note->type == ' ' && !in_comment)
226 cpp_error_with_line (pfile, DL_WARNING, pfile->line, col,
227 "backslash and newline separated by space");
229 if (buffer->next_line > buffer->rlimit)
231 cpp_error_with_line (pfile, DL_PEDWARN, pfile->line, col,
232 "backslash-newline at end of file");
233 /* Prevent "no newline at end of file" warning. */
234 buffer->next_line = buffer->rlimit;
237 buffer->line_base = note->pos;
238 pfile->line++;
240 else if (_cpp_trigraph_map[note->type])
242 if (CPP_OPTION (pfile, warn_trigraphs)
243 && (!in_comment || warn_in_comment (pfile, note)))
245 if (CPP_OPTION (pfile, trigraphs))
246 cpp_error_with_line (pfile, DL_WARNING, pfile->line, col,
247 "trigraph ??%c converted to %c",
248 note->type,
249 (int) _cpp_trigraph_map[note->type]);
250 else
251 cpp_error_with_line (pfile, DL_WARNING, pfile->line, col,
252 "trigraph ??%c ignored",
253 note->type);
256 else
257 abort ();
261 /* Skip a C-style block comment. We find the end of the comment by
262 seeing if an asterisk is before every '/' we encounter. Returns
263 nonzero if comment terminated by EOF, zero otherwise.
265 Buffer->cur points to the initial asterisk of the comment. */
266 bool
267 _cpp_skip_block_comment (cpp_reader *pfile)
269 cpp_buffer *buffer = pfile->buffer;
270 cppchar_t c;
272 buffer->cur++;
273 if (*buffer->cur == '/')
274 buffer->cur++;
276 for (;;)
278 c = *buffer->cur++;
280 /* People like decorating comments with '*', so check for '/'
281 instead for efficiency. */
282 if (c == '/')
284 if (buffer->cur[-2] == '*')
285 break;
287 /* Warn about potential nested comments, but not if the '/'
288 comes immediately before the true comment delimiter.
289 Don't bother to get it right across escaped newlines. */
290 if (CPP_OPTION (pfile, warn_comments)
291 && buffer->cur[0] == '*' && buffer->cur[1] != '/')
292 cpp_error_with_line (pfile, DL_WARNING,
293 pfile->line, CPP_BUF_COL (buffer),
294 "\"/*\" within comment");
296 else if (c == '\n')
298 buffer->cur--;
299 _cpp_process_line_notes (pfile, true);
300 if (buffer->next_line >= buffer->rlimit)
301 return true;
302 _cpp_clean_line (pfile);
303 pfile->line++;
307 _cpp_process_line_notes (pfile, true);
308 return false;
311 /* Skip a C++ line comment, leaving buffer->cur pointing to the
312 terminating newline. Handles escaped newlines. Returns nonzero
313 if a multiline comment. */
314 static int
315 skip_line_comment (cpp_reader *pfile)
317 cpp_buffer *buffer = pfile->buffer;
318 unsigned int orig_line = pfile->line;
320 while (*buffer->cur != '\n')
321 buffer->cur++;
323 _cpp_process_line_notes (pfile, true);
324 return orig_line != pfile->line;
327 /* Skips whitespace, saving the next non-whitespace character. */
328 static void
329 skip_whitespace (cpp_reader *pfile, cppchar_t c)
331 cpp_buffer *buffer = pfile->buffer;
332 bool saw_NUL = false;
336 /* Horizontal space always OK. */
337 if (c == ' ' || c == '\t')
339 /* Just \f \v or \0 left. */
340 else if (c == '\0')
341 saw_NUL = true;
342 else if (pfile->state.in_directive && CPP_PEDANTIC (pfile))
343 cpp_error_with_line (pfile, DL_PEDWARN, pfile->line,
344 CPP_BUF_COL (buffer),
345 "%s in preprocessing directive",
346 c == '\f' ? "form feed" : "vertical tab");
348 c = *buffer->cur++;
350 /* We only want non-vertical space, i.e. ' ' \t \f \v \0. */
351 while (is_nvspace (c));
353 if (saw_NUL)
354 cpp_error (pfile, DL_WARNING, "null character(s) ignored");
356 buffer->cur--;
359 /* See if the characters of a number token are valid in a name (no
360 '.', '+' or '-'). */
361 static int
362 name_p (cpp_reader *pfile, const cpp_string *string)
364 unsigned int i;
366 for (i = 0; i < string->len; i++)
367 if (!is_idchar (string->text[i]))
368 return 0;
370 return 1;
373 /* Returns TRUE if the sequence starting at buffer->cur is invalid in
374 an identifier. FIRST is TRUE if this starts an identifier. */
375 static bool
376 forms_identifier_p (cpp_reader *pfile, int first)
378 cpp_buffer *buffer = pfile->buffer;
380 if (*buffer->cur == '$')
382 if (!CPP_OPTION (pfile, dollars_in_ident))
383 return false;
385 buffer->cur++;
386 if (CPP_OPTION (pfile, warn_dollars) && !pfile->state.skipping)
388 CPP_OPTION (pfile, warn_dollars) = 0;
389 cpp_error (pfile, DL_PEDWARN, "'$' in identifier or number");
392 return true;
395 /* Is this a syntactically valid UCN? */
396 if (0 && *buffer->cur == '\\'
397 && (buffer->cur[1] == 'u' || buffer->cur[1] == 'U'))
399 buffer->cur += 2;
400 if (_cpp_valid_ucn (pfile, &buffer->cur, 1 + !first))
401 return true;
402 buffer->cur -= 2;
405 return false;
408 /* Lex an identifier starting at BUFFER->CUR - 1. */
409 static cpp_hashnode *
410 lex_identifier (cpp_reader *pfile, const uchar *base)
412 cpp_hashnode *result;
413 const uchar *cur;
417 cur = pfile->buffer->cur;
419 /* N.B. ISIDNUM does not include $. */
420 while (ISIDNUM (*cur))
421 cur++;
423 pfile->buffer->cur = cur;
425 while (forms_identifier_p (pfile, false));
427 result = (cpp_hashnode *)
428 ht_lookup (pfile->hash_table, base, cur - base, HT_ALLOC);
430 /* Rarely, identifiers require diagnostics when lexed. */
431 if (__builtin_expect ((result->flags & NODE_DIAGNOSTIC)
432 && !pfile->state.skipping, 0))
434 /* It is allowed to poison the same identifier twice. */
435 if ((result->flags & NODE_POISONED) && !pfile->state.poisoned_ok)
436 cpp_error (pfile, DL_ERROR, "attempt to use poisoned \"%s\"",
437 NODE_NAME (result));
439 /* Constraint 6.10.3.5: __VA_ARGS__ should only appear in the
440 replacement list of a variadic macro. */
441 if (result == pfile->spec_nodes.n__VA_ARGS__
442 && !pfile->state.va_args_ok)
443 cpp_error (pfile, DL_PEDWARN,
444 "__VA_ARGS__ can only appear in the expansion"
445 " of a C99 variadic macro");
448 return result;
451 /* Lex a number to NUMBER starting at BUFFER->CUR - 1. */
452 static void
453 lex_number (cpp_reader *pfile, cpp_string *number)
455 const uchar *cur;
456 const uchar *base;
457 uchar *dest;
459 base = pfile->buffer->cur - 1;
462 cur = pfile->buffer->cur;
464 /* N.B. ISIDNUM does not include $. */
465 while (ISIDNUM (*cur) || *cur == '.' || VALID_SIGN (*cur, cur[-1]))
466 cur++;
468 pfile->buffer->cur = cur;
470 while (forms_identifier_p (pfile, false));
472 number->len = cur - base;
473 dest = _cpp_unaligned_alloc (pfile, number->len + 1);
474 memcpy (dest, base, number->len);
475 dest[number->len] = '\0';
476 number->text = dest;
479 /* Create a token of type TYPE with a literal spelling. */
480 static void
481 create_literal (cpp_reader *pfile, cpp_token *token, const uchar *base,
482 unsigned int len, enum cpp_ttype type)
484 uchar *dest = _cpp_unaligned_alloc (pfile, len + 1);
486 memcpy (dest, base, len);
487 dest[len] = '\0';
488 token->type = type;
489 token->val.str.len = len;
490 token->val.str.text = dest;
493 /* Lexes a string, character constant, or angle-bracketed header file
494 name. The stored string contains the spelling, including opening
495 quote and leading any leading 'L'. It returns the type of the
496 literal, or CPP_OTHER if it was not properly terminated.
498 The spelling is NUL-terminated, but it is not guaranteed that this
499 is the first NUL since embedded NULs are preserved. */
500 static void
501 lex_string (cpp_reader *pfile, cpp_token *token, const uchar *base)
503 bool saw_NUL = false;
504 const uchar *cur;
505 cppchar_t terminator;
506 enum cpp_ttype type;
508 cur = base;
509 terminator = *cur++;
510 if (terminator == 'L')
511 terminator = *cur++;
512 if (terminator == '\"')
513 type = *base == 'L' ? CPP_WSTRING: CPP_STRING;
514 else if (terminator == '\'')
515 type = *base == 'L' ? CPP_WCHAR: CPP_CHAR;
516 else
517 terminator = '>', type = CPP_HEADER_NAME;
519 for (;;)
521 cppchar_t c = *cur++;
523 /* In #include-style directives, terminators are not escapable. */
524 if (c == '\\' && !pfile->state.angled_headers && *cur != '\n')
525 cur++;
526 else if (c == terminator)
527 break;
528 else if (c == '\n')
530 cur--;
531 type = CPP_OTHER;
532 break;
534 else if (c == '\0')
535 saw_NUL = true;
538 if (saw_NUL && !pfile->state.skipping)
539 cpp_error (pfile, DL_WARNING, "null character(s) preserved in literal");
541 pfile->buffer->cur = cur;
542 create_literal (pfile, token, base, cur - base, type);
545 /* The stored comment includes the comment start and any terminator. */
546 static void
547 save_comment (cpp_reader *pfile, cpp_token *token, const unsigned char *from,
548 cppchar_t type)
550 unsigned char *buffer;
551 unsigned int len, clen;
553 len = pfile->buffer->cur - from + 1; /* + 1 for the initial '/'. */
555 /* C++ comments probably (not definitely) have moved past a new
556 line, which we don't want to save in the comment. */
557 if (is_vspace (pfile->buffer->cur[-1]))
558 len--;
560 /* If we are currently in a directive, then we need to store all
561 C++ comments as C comments internally, and so we need to
562 allocate a little extra space in that case.
564 Note that the only time we encounter a directive here is
565 when we are saving comments in a "#define". */
566 clen = (pfile->state.in_directive && type == '/') ? len + 2 : len;
568 buffer = _cpp_unaligned_alloc (pfile, clen);
570 token->type = CPP_COMMENT;
571 token->val.str.len = clen;
572 token->val.str.text = buffer;
574 buffer[0] = '/';
575 memcpy (buffer + 1, from, len - 1);
577 /* Finish conversion to a C comment, if necessary. */
578 if (pfile->state.in_directive && type == '/')
580 buffer[1] = '*';
581 buffer[clen - 2] = '*';
582 buffer[clen - 1] = '/';
586 /* Allocate COUNT tokens for RUN. */
587 void
588 _cpp_init_tokenrun (tokenrun *run, unsigned int count)
590 run->base = xnewvec (cpp_token, count);
591 run->limit = run->base + count;
592 run->next = NULL;
595 /* Returns the next tokenrun, or creates one if there is none. */
596 static tokenrun *
597 next_tokenrun (tokenrun *run)
599 if (run->next == NULL)
601 run->next = xnew (tokenrun);
602 run->next->prev = run;
603 _cpp_init_tokenrun (run->next, 250);
606 return run->next;
609 /* Allocate a single token that is invalidated at the same time as the
610 rest of the tokens on the line. Has its line and col set to the
611 same as the last lexed token, so that diagnostics appear in the
612 right place. */
613 cpp_token *
614 _cpp_temp_token (cpp_reader *pfile)
616 cpp_token *old, *result;
618 old = pfile->cur_token - 1;
619 if (pfile->cur_token == pfile->cur_run->limit)
621 pfile->cur_run = next_tokenrun (pfile->cur_run);
622 pfile->cur_token = pfile->cur_run->base;
625 result = pfile->cur_token++;
626 result->line = old->line;
627 result->col = old->col;
628 return result;
631 /* Lex a token into RESULT (external interface). Takes care of issues
632 like directive handling, token lookahead, multiple include
633 optimization and skipping. */
634 const cpp_token *
635 _cpp_lex_token (cpp_reader *pfile)
637 cpp_token *result;
639 for (;;)
641 if (pfile->cur_token == pfile->cur_run->limit)
643 pfile->cur_run = next_tokenrun (pfile->cur_run);
644 pfile->cur_token = pfile->cur_run->base;
647 if (pfile->lookaheads)
649 pfile->lookaheads--;
650 result = pfile->cur_token++;
652 else
653 result = _cpp_lex_direct (pfile);
655 if (result->flags & BOL)
657 /* Is this a directive. If _cpp_handle_directive returns
658 false, it is an assembler #. */
659 if (result->type == CPP_HASH
660 /* 6.10.3 p 11: Directives in a list of macro arguments
661 gives undefined behavior. This implementation
662 handles the directive as normal. */
663 && pfile->state.parsing_args != 1
664 && _cpp_handle_directive (pfile, result->flags & PREV_WHITE))
665 continue;
666 if (pfile->cb.line_change && !pfile->state.skipping)
667 pfile->cb.line_change (pfile, result, pfile->state.parsing_args);
670 /* We don't skip tokens in directives. */
671 if (pfile->state.in_directive)
672 break;
674 /* Outside a directive, invalidate controlling macros. At file
675 EOF, _cpp_lex_direct takes care of popping the buffer, so we never
676 get here and MI optimization works. */
677 pfile->mi_valid = false;
679 if (!pfile->state.skipping || result->type == CPP_EOF)
680 break;
683 return result;
686 /* Returns true if a fresh line has been loaded. */
687 bool
688 _cpp_get_fresh_line (cpp_reader *pfile)
690 /* We can't get a new line until we leave the current directive. */
691 if (pfile->state.in_directive)
692 return false;
694 for (;;)
696 cpp_buffer *buffer = pfile->buffer;
698 if (!buffer->need_line)
699 return true;
701 if (buffer->next_line < buffer->rlimit)
703 _cpp_clean_line (pfile);
704 return true;
707 /* First, get out of parsing arguments state. */
708 if (pfile->state.parsing_args)
709 return false;
711 /* End of buffer. Non-empty files should end in a newline. */
712 if (buffer->buf != buffer->rlimit
713 && buffer->next_line > buffer->rlimit
714 && !buffer->from_stage3)
716 /* Only warn once. */
717 buffer->next_line = buffer->rlimit;
718 cpp_error_with_line (pfile, DL_PEDWARN, pfile->line - 1,
719 CPP_BUF_COLUMN (buffer, buffer->cur),
720 "no newline at end of file");
723 if (!buffer->prev)
724 return false;
726 if (buffer->return_at_eof)
728 _cpp_pop_buffer (pfile);
729 return false;
732 _cpp_pop_buffer (pfile);
736 #define IF_NEXT_IS(CHAR, THEN_TYPE, ELSE_TYPE) \
737 do \
739 result->type = ELSE_TYPE; \
740 if (*buffer->cur == CHAR) \
741 buffer->cur++, result->type = THEN_TYPE; \
743 while (0)
745 /* Lex a token into pfile->cur_token, which is also incremented, to
746 get diagnostics pointing to the correct location.
748 Does not handle issues such as token lookahead, multiple-include
749 optimisation, directives, skipping etc. This function is only
750 suitable for use by _cpp_lex_token, and in special cases like
751 lex_expansion_token which doesn't care for any of these issues.
753 When meeting a newline, returns CPP_EOF if parsing a directive,
754 otherwise returns to the start of the token buffer if permissible.
755 Returns the location of the lexed token. */
756 cpp_token *
757 _cpp_lex_direct (cpp_reader *pfile)
759 cppchar_t c;
760 cpp_buffer *buffer;
761 const unsigned char *comment_start;
762 cpp_token *result = pfile->cur_token++;
764 fresh_line:
765 result->flags = 0;
766 if (pfile->buffer->need_line)
768 if (!_cpp_get_fresh_line (pfile))
770 result->type = CPP_EOF;
771 if (!pfile->state.in_directive)
773 /* Tell the compiler the line number of the EOF token. */
774 result->line = pfile->line;
775 result->flags = BOL;
777 return result;
779 if (!pfile->keep_tokens)
781 pfile->cur_run = &pfile->base_run;
782 result = pfile->base_run.base;
783 pfile->cur_token = result + 1;
785 result->flags = BOL;
786 if (pfile->state.parsing_args == 2)
787 result->flags |= PREV_WHITE;
789 buffer = pfile->buffer;
790 update_tokens_line:
791 result->line = pfile->line;
793 skipped_white:
794 if (buffer->cur >= buffer->notes[buffer->cur_note].pos
795 && !pfile->overlaid_buffer)
797 _cpp_process_line_notes (pfile, false);
798 result->line = pfile->line;
800 c = *buffer->cur++;
801 result->col = CPP_BUF_COLUMN (buffer, buffer->cur);
803 switch (c)
805 case ' ': case '\t': case '\f': case '\v': case '\0':
806 result->flags |= PREV_WHITE;
807 skip_whitespace (pfile, c);
808 goto skipped_white;
810 case '\n':
811 pfile->line++;
812 buffer->need_line = true;
813 goto fresh_line;
815 case '0': case '1': case '2': case '3': case '4':
816 case '5': case '6': case '7': case '8': case '9':
817 result->type = CPP_NUMBER;
818 lex_number (pfile, &result->val.str);
819 break;
821 case 'L':
822 /* 'L' may introduce wide characters or strings. */
823 if (*buffer->cur == '\'' || *buffer->cur == '"')
825 lex_string (pfile, result, buffer->cur - 1);
826 break;
828 /* Fall through. */
830 case '_':
831 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
832 case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
833 case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
834 case 's': case 't': case 'u': case 'v': case 'w': case 'x':
835 case 'y': case 'z':
836 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
837 case 'G': case 'H': case 'I': case 'J': case 'K':
838 case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
839 case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
840 case 'Y': case 'Z':
841 result->type = CPP_NAME;
842 result->val.node = lex_identifier (pfile, buffer->cur - 1);
844 /* Convert named operators to their proper types. */
845 if (result->val.node->flags & NODE_OPERATOR)
847 result->flags |= NAMED_OP;
848 result->type = result->val.node->directive_index;
850 break;
852 case '\'':
853 case '"':
854 lex_string (pfile, result, buffer->cur - 1);
855 break;
857 case '/':
858 /* A potential block or line comment. */
859 comment_start = buffer->cur;
860 c = *buffer->cur;
862 if (c == '*')
864 if (_cpp_skip_block_comment (pfile))
865 cpp_error (pfile, DL_ERROR, "unterminated comment");
867 else if (c == '/' && (CPP_OPTION (pfile, cplusplus_comments)
868 || CPP_IN_SYSTEM_HEADER (pfile)))
870 /* Warn about comments only if pedantically GNUC89, and not
871 in system headers. */
872 if (CPP_OPTION (pfile, lang) == CLK_GNUC89 && CPP_PEDANTIC (pfile)
873 && ! buffer->warned_cplusplus_comments)
875 cpp_error (pfile, DL_PEDWARN,
876 "C++ style comments are not allowed in ISO C90");
877 cpp_error (pfile, DL_PEDWARN,
878 "(this will be reported only once per input file)");
879 buffer->warned_cplusplus_comments = 1;
882 if (skip_line_comment (pfile) && CPP_OPTION (pfile, warn_comments))
883 cpp_error (pfile, DL_WARNING, "multi-line comment");
885 else if (c == '=')
887 buffer->cur++;
888 result->type = CPP_DIV_EQ;
889 break;
891 else
893 result->type = CPP_DIV;
894 break;
897 if (!pfile->state.save_comments)
899 result->flags |= PREV_WHITE;
900 goto update_tokens_line;
903 /* Save the comment as a token in its own right. */
904 save_comment (pfile, result, comment_start, c);
905 break;
907 case '<':
908 if (pfile->state.angled_headers)
910 lex_string (pfile, result, buffer->cur - 1);
911 break;
914 result->type = CPP_LESS;
915 if (*buffer->cur == '=')
916 buffer->cur++, result->type = CPP_LESS_EQ;
917 else if (*buffer->cur == '<')
919 buffer->cur++;
920 IF_NEXT_IS ('=', CPP_LSHIFT_EQ, CPP_LSHIFT);
922 else if (*buffer->cur == '?' && CPP_OPTION (pfile, cplusplus))
924 buffer->cur++;
925 IF_NEXT_IS ('=', CPP_MIN_EQ, CPP_MIN);
927 else if (CPP_OPTION (pfile, digraphs))
929 if (*buffer->cur == ':')
931 buffer->cur++;
932 result->flags |= DIGRAPH;
933 result->type = CPP_OPEN_SQUARE;
935 else if (*buffer->cur == '%')
937 buffer->cur++;
938 result->flags |= DIGRAPH;
939 result->type = CPP_OPEN_BRACE;
942 break;
944 case '>':
945 result->type = CPP_GREATER;
946 if (*buffer->cur == '=')
947 buffer->cur++, result->type = CPP_GREATER_EQ;
948 else if (*buffer->cur == '>')
950 buffer->cur++;
951 IF_NEXT_IS ('=', CPP_RSHIFT_EQ, CPP_RSHIFT);
953 else if (*buffer->cur == '?' && CPP_OPTION (pfile, cplusplus))
955 buffer->cur++;
956 IF_NEXT_IS ('=', CPP_MAX_EQ, CPP_MAX);
958 break;
960 case '%':
961 result->type = CPP_MOD;
962 if (*buffer->cur == '=')
963 buffer->cur++, result->type = CPP_MOD_EQ;
964 else if (CPP_OPTION (pfile, digraphs))
966 if (*buffer->cur == ':')
968 buffer->cur++;
969 result->flags |= DIGRAPH;
970 result->type = CPP_HASH;
971 if (*buffer->cur == '%' && buffer->cur[1] == ':')
972 buffer->cur += 2, result->type = CPP_PASTE;
974 else if (*buffer->cur == '>')
976 buffer->cur++;
977 result->flags |= DIGRAPH;
978 result->type = CPP_CLOSE_BRACE;
981 break;
983 case '.':
984 result->type = CPP_DOT;
985 if (ISDIGIT (*buffer->cur))
987 result->type = CPP_NUMBER;
988 lex_number (pfile, &result->val.str);
990 else if (*buffer->cur == '.' && buffer->cur[1] == '.')
991 buffer->cur += 2, result->type = CPP_ELLIPSIS;
992 else if (*buffer->cur == '*' && CPP_OPTION (pfile, cplusplus))
993 buffer->cur++, result->type = CPP_DOT_STAR;
994 break;
996 case '+':
997 result->type = CPP_PLUS;
998 if (*buffer->cur == '+')
999 buffer->cur++, result->type = CPP_PLUS_PLUS;
1000 else if (*buffer->cur == '=')
1001 buffer->cur++, result->type = CPP_PLUS_EQ;
1002 break;
1004 case '-':
1005 result->type = CPP_MINUS;
1006 if (*buffer->cur == '>')
1008 buffer->cur++;
1009 result->type = CPP_DEREF;
1010 if (*buffer->cur == '*' && CPP_OPTION (pfile, cplusplus))
1011 buffer->cur++, result->type = CPP_DEREF_STAR;
1013 else if (*buffer->cur == '-')
1014 buffer->cur++, result->type = CPP_MINUS_MINUS;
1015 else if (*buffer->cur == '=')
1016 buffer->cur++, result->type = CPP_MINUS_EQ;
1017 break;
1019 case '&':
1020 result->type = CPP_AND;
1021 if (*buffer->cur == '&')
1022 buffer->cur++, result->type = CPP_AND_AND;
1023 else if (*buffer->cur == '=')
1024 buffer->cur++, result->type = CPP_AND_EQ;
1025 break;
1027 case '|':
1028 result->type = CPP_OR;
1029 if (*buffer->cur == '|')
1030 buffer->cur++, result->type = CPP_OR_OR;
1031 else if (*buffer->cur == '=')
1032 buffer->cur++, result->type = CPP_OR_EQ;
1033 break;
1035 case ':':
1036 result->type = CPP_COLON;
1037 if (*buffer->cur == ':' && CPP_OPTION (pfile, cplusplus))
1038 buffer->cur++, result->type = CPP_SCOPE;
1039 else if (*buffer->cur == '>' && CPP_OPTION (pfile, digraphs))
1041 buffer->cur++;
1042 result->flags |= DIGRAPH;
1043 result->type = CPP_CLOSE_SQUARE;
1045 break;
1047 case '*': IF_NEXT_IS ('=', CPP_MULT_EQ, CPP_MULT); break;
1048 case '=': IF_NEXT_IS ('=', CPP_EQ_EQ, CPP_EQ); break;
1049 case '!': IF_NEXT_IS ('=', CPP_NOT_EQ, CPP_NOT); break;
1050 case '^': IF_NEXT_IS ('=', CPP_XOR_EQ, CPP_XOR); break;
1051 case '#': IF_NEXT_IS ('#', CPP_PASTE, CPP_HASH); break;
1053 case '?': result->type = CPP_QUERY; break;
1054 case '~': result->type = CPP_COMPL; break;
1055 case ',': result->type = CPP_COMMA; break;
1056 case '(': result->type = CPP_OPEN_PAREN; break;
1057 case ')': result->type = CPP_CLOSE_PAREN; break;
1058 case '[': result->type = CPP_OPEN_SQUARE; break;
1059 case ']': result->type = CPP_CLOSE_SQUARE; break;
1060 case '{': result->type = CPP_OPEN_BRACE; break;
1061 case '}': result->type = CPP_CLOSE_BRACE; break;
1062 case ';': result->type = CPP_SEMICOLON; break;
1064 /* @ is a punctuator in Objective-C. */
1065 case '@': result->type = CPP_ATSIGN; break;
1067 case '$':
1068 case '\\':
1070 const uchar *base = --buffer->cur;
1072 if (forms_identifier_p (pfile, true))
1074 result->type = CPP_NAME;
1075 result->val.node = lex_identifier (pfile, base);
1076 break;
1078 buffer->cur++;
1081 default:
1082 create_literal (pfile, result, buffer->cur - 1, 1, CPP_OTHER);
1083 break;
1086 return result;
1089 /* An upper bound on the number of bytes needed to spell TOKEN.
1090 Does not include preceding whitespace. */
1091 unsigned int
1092 cpp_token_len (const cpp_token *token)
1094 unsigned int len;
1096 switch (TOKEN_SPELL (token))
1098 default: len = 4; break;
1099 case SPELL_LITERAL: len = token->val.str.len; break;
1100 case SPELL_IDENT: len = NODE_LEN (token->val.node); break;
1103 return len;
1106 /* Write the spelling of a token TOKEN to BUFFER. The buffer must
1107 already contain the enough space to hold the token's spelling.
1108 Returns a pointer to the character after the last character written.
1109 FIXME: Would be nice if we didn't need the PFILE argument. */
1110 unsigned char *
1111 cpp_spell_token (cpp_reader *pfile, const cpp_token *token,
1112 unsigned char *buffer)
1114 switch (TOKEN_SPELL (token))
1116 case SPELL_OPERATOR:
1118 const unsigned char *spelling;
1119 unsigned char c;
1121 if (token->flags & DIGRAPH)
1122 spelling
1123 = digraph_spellings[(int) token->type - (int) CPP_FIRST_DIGRAPH];
1124 else if (token->flags & NAMED_OP)
1125 goto spell_ident;
1126 else
1127 spelling = TOKEN_NAME (token);
1129 while ((c = *spelling++) != '\0')
1130 *buffer++ = c;
1132 break;
1134 spell_ident:
1135 case SPELL_IDENT:
1136 memcpy (buffer, NODE_NAME (token->val.node), NODE_LEN (token->val.node));
1137 buffer += NODE_LEN (token->val.node);
1138 break;
1140 case SPELL_LITERAL:
1141 memcpy (buffer, token->val.str.text, token->val.str.len);
1142 buffer += token->val.str.len;
1143 break;
1145 case SPELL_NONE:
1146 cpp_error (pfile, DL_ICE, "unspellable token %s", TOKEN_NAME (token));
1147 break;
1150 return buffer;
1153 /* Returns TOKEN spelt as a null-terminated string. The string is
1154 freed when the reader is destroyed. Useful for diagnostics. */
1155 unsigned char *
1156 cpp_token_as_text (cpp_reader *pfile, const cpp_token *token)
1158 unsigned int len = cpp_token_len (token) + 1;
1159 unsigned char *start = _cpp_unaligned_alloc (pfile, len), *end;
1161 end = cpp_spell_token (pfile, token, start);
1162 end[0] = '\0';
1164 return start;
1167 /* Used by C front ends, which really should move to using
1168 cpp_token_as_text. */
1169 const char *
1170 cpp_type2name (enum cpp_ttype type)
1172 return (const char *) token_spellings[type].name;
1175 /* Writes the spelling of token to FP, without any preceding space.
1176 Separated from cpp_spell_token for efficiency - to avoid stdio
1177 double-buffering. */
1178 void
1179 cpp_output_token (const cpp_token *token, FILE *fp)
1181 switch (TOKEN_SPELL (token))
1183 case SPELL_OPERATOR:
1185 const unsigned char *spelling;
1186 int c;
1188 if (token->flags & DIGRAPH)
1189 spelling
1190 = digraph_spellings[(int) token->type - (int) CPP_FIRST_DIGRAPH];
1191 else if (token->flags & NAMED_OP)
1192 goto spell_ident;
1193 else
1194 spelling = TOKEN_NAME (token);
1196 c = *spelling;
1198 putc (c, fp);
1199 while ((c = *++spelling) != '\0');
1201 break;
1203 spell_ident:
1204 case SPELL_IDENT:
1205 fwrite (NODE_NAME (token->val.node), 1, NODE_LEN (token->val.node), fp);
1206 break;
1208 case SPELL_LITERAL:
1209 fwrite (token->val.str.text, 1, token->val.str.len, fp);
1210 break;
1212 case SPELL_NONE:
1213 /* An error, most probably. */
1214 break;
1218 /* Compare two tokens. */
1220 _cpp_equiv_tokens (const cpp_token *a, const cpp_token *b)
1222 if (a->type == b->type && a->flags == b->flags)
1223 switch (TOKEN_SPELL (a))
1225 default: /* Keep compiler happy. */
1226 case SPELL_OPERATOR:
1227 return 1;
1228 case SPELL_NONE:
1229 return (a->type != CPP_MACRO_ARG || a->val.arg_no == b->val.arg_no);
1230 case SPELL_IDENT:
1231 return a->val.node == b->val.node;
1232 case SPELL_LITERAL:
1233 return (a->val.str.len == b->val.str.len
1234 && !memcmp (a->val.str.text, b->val.str.text,
1235 a->val.str.len));
1238 return 0;
1241 /* Returns nonzero if a space should be inserted to avoid an
1242 accidental token paste for output. For simplicity, it is
1243 conservative, and occasionally advises a space where one is not
1244 needed, e.g. "." and ".2". */
1246 cpp_avoid_paste (cpp_reader *pfile, const cpp_token *token1,
1247 const cpp_token *token2)
1249 enum cpp_ttype a = token1->type, b = token2->type;
1250 cppchar_t c;
1252 if (token1->flags & NAMED_OP)
1253 a = CPP_NAME;
1254 if (token2->flags & NAMED_OP)
1255 b = CPP_NAME;
1257 c = EOF;
1258 if (token2->flags & DIGRAPH)
1259 c = digraph_spellings[(int) b - (int) CPP_FIRST_DIGRAPH][0];
1260 else if (token_spellings[b].category == SPELL_OPERATOR)
1261 c = token_spellings[b].name[0];
1263 /* Quickly get everything that can paste with an '='. */
1264 if ((int) a <= (int) CPP_LAST_EQ && c == '=')
1265 return 1;
1267 switch (a)
1269 case CPP_GREATER: return c == '>' || c == '?';
1270 case CPP_LESS: return c == '<' || c == '?' || c == '%' || c == ':';
1271 case CPP_PLUS: return c == '+';
1272 case CPP_MINUS: return c == '-' || c == '>';
1273 case CPP_DIV: return c == '/' || c == '*'; /* Comments. */
1274 case CPP_MOD: return c == ':' || c == '>';
1275 case CPP_AND: return c == '&';
1276 case CPP_OR: return c == '|';
1277 case CPP_COLON: return c == ':' || c == '>';
1278 case CPP_DEREF: return c == '*';
1279 case CPP_DOT: return c == '.' || c == '%' || b == CPP_NUMBER;
1280 case CPP_HASH: return c == '#' || c == '%'; /* Digraph form. */
1281 case CPP_NAME: return ((b == CPP_NUMBER
1282 && name_p (pfile, &token2->val.str))
1283 || b == CPP_NAME
1284 || b == CPP_CHAR || b == CPP_STRING); /* L */
1285 case CPP_NUMBER: return (b == CPP_NUMBER || b == CPP_NAME
1286 || c == '.' || c == '+' || c == '-');
1287 /* UCNs */
1288 case CPP_OTHER: return ((token1->val.str.text[0] == '\\'
1289 && b == CPP_NAME)
1290 || (CPP_OPTION (pfile, objc)
1291 && token1->val.str.text[0] == '@'
1292 && (b == CPP_NAME || b == CPP_STRING)));
1293 default: break;
1296 return 0;
1299 /* Output all the remaining tokens on the current line, and a newline
1300 character, to FP. Leading whitespace is removed. If there are
1301 macros, special token padding is not performed. */
1302 void
1303 cpp_output_line (cpp_reader *pfile, FILE *fp)
1305 const cpp_token *token;
1307 token = cpp_get_token (pfile);
1308 while (token->type != CPP_EOF)
1310 cpp_output_token (token, fp);
1311 token = cpp_get_token (pfile);
1312 if (token->flags & PREV_WHITE)
1313 putc (' ', fp);
1316 putc ('\n', fp);
1319 /* Returns the value of a hexadecimal digit. */
1320 static unsigned int
1321 hex_digit_value (unsigned int c)
1323 if (hex_p (c))
1324 return hex_value (c);
1325 else
1326 abort ();
1329 /* Read a possible universal character name starting at *PSTR. */
1330 static cppchar_t
1331 maybe_read_ucn (cpp_reader *pfile, const uchar **pstr)
1333 cppchar_t result, c = (*pstr)[-1];
1335 result = _cpp_valid_ucn (pfile, pstr, false);
1336 if (result)
1338 if (CPP_WTRADITIONAL (pfile))
1339 cpp_error (pfile, DL_WARNING,
1340 "the meaning of '\\%c' is different in traditional C",
1341 (int) c);
1343 if (CPP_OPTION (pfile, EBCDIC))
1345 cpp_error (pfile, DL_ERROR,
1346 "universal character with an EBCDIC target");
1347 result = 0x3f; /* EBCDIC invalid character */
1351 return result;
1354 /* Returns the value of an escape sequence, truncated to the correct
1355 target precision. PSTR points to the input pointer, which is just
1356 after the backslash. LIMIT is how much text we have. WIDE is true
1357 if the escape sequence is part of a wide character constant or
1358 string literal. Handles all relevant diagnostics. */
1359 cppchar_t
1360 cpp_parse_escape (cpp_reader *pfile, const unsigned char **pstr,
1361 const unsigned char *limit, int wide)
1363 /* Values of \a \b \e \f \n \r \t \v respectively. */
1364 static const uchar ascii[] = { 7, 8, 27, 12, 10, 13, 9, 11 };
1365 static const uchar ebcdic[] = { 47, 22, 39, 12, 21, 13, 5, 11 };
1367 int unknown = 0;
1368 const unsigned char *str = *pstr, *charconsts;
1369 cppchar_t c, ucn, mask;
1370 unsigned int width;
1372 if (CPP_OPTION (pfile, EBCDIC))
1373 charconsts = ebcdic;
1374 else
1375 charconsts = ascii;
1377 if (wide)
1378 width = CPP_OPTION (pfile, wchar_precision);
1379 else
1380 width = CPP_OPTION (pfile, char_precision);
1381 if (width < BITS_PER_CPPCHAR_T)
1382 mask = ((cppchar_t) 1 << width) - 1;
1383 else
1384 mask = ~0;
1386 c = *str++;
1387 switch (c)
1389 case '\\': case '\'': case '"': case '?': break;
1390 case 'b': c = charconsts[1]; break;
1391 case 'f': c = charconsts[3]; break;
1392 case 'n': c = charconsts[4]; break;
1393 case 'r': c = charconsts[5]; break;
1394 case 't': c = charconsts[6]; break;
1395 case 'v': c = charconsts[7]; break;
1397 case '(': case '{': case '[': case '%':
1398 /* '\(', etc, are used at beginning of line to avoid confusing Emacs.
1399 '\%' is used to prevent SCCS from getting confused. */
1400 unknown = CPP_PEDANTIC (pfile);
1401 break;
1403 case 'a':
1404 if (CPP_WTRADITIONAL (pfile))
1405 cpp_error (pfile, DL_WARNING,
1406 "the meaning of '\\a' is different in traditional C");
1407 c = charconsts[0];
1408 break;
1410 case 'e': case 'E':
1411 if (CPP_PEDANTIC (pfile))
1412 cpp_error (pfile, DL_PEDWARN,
1413 "non-ISO-standard escape sequence, '\\%c'", (int) c);
1414 c = charconsts[2];
1415 break;
1417 case 'u': case 'U':
1418 ucn = maybe_read_ucn (pfile, &str);
1419 if (ucn)
1420 c = ucn;
1421 else
1422 unknown = true;
1423 break;
1425 case 'x':
1426 if (CPP_WTRADITIONAL (pfile))
1427 cpp_error (pfile, DL_WARNING,
1428 "the meaning of '\\x' is different in traditional C");
1431 cppchar_t i = 0, overflow = 0;
1432 int digits_found = 0;
1434 while (str < limit)
1436 c = *str;
1437 if (! ISXDIGIT (c))
1438 break;
1439 str++;
1440 overflow |= i ^ (i << 4 >> 4);
1441 i = (i << 4) + hex_digit_value (c);
1442 digits_found = 1;
1445 if (!digits_found)
1446 cpp_error (pfile, DL_ERROR,
1447 "\\x used with no following hex digits");
1449 if (overflow | (i != (i & mask)))
1451 cpp_error (pfile, DL_PEDWARN,
1452 "hex escape sequence out of range");
1453 i &= mask;
1455 c = i;
1457 break;
1459 case '0': case '1': case '2': case '3':
1460 case '4': case '5': case '6': case '7':
1462 size_t count = 0;
1463 cppchar_t i = c - '0';
1465 while (str < limit && ++count < 3)
1467 c = *str;
1468 if (c < '0' || c > '7')
1469 break;
1470 str++;
1471 i = (i << 3) + c - '0';
1474 if (i != (i & mask))
1476 cpp_error (pfile, DL_PEDWARN,
1477 "octal escape sequence out of range");
1478 i &= mask;
1480 c = i;
1482 break;
1484 default:
1485 unknown = 1;
1486 break;
1489 if (unknown)
1491 if (ISGRAPH (c))
1492 cpp_error (pfile, DL_PEDWARN,
1493 "unknown escape sequence '\\%c'", (int) c);
1494 else
1495 cpp_error (pfile, DL_PEDWARN,
1496 "unknown escape sequence: '\\%03o'", (int) c);
1499 if (c > mask)
1501 cpp_error (pfile, DL_PEDWARN,
1502 "escape sequence out of range for its type");
1503 c &= mask;
1506 *pstr = str;
1507 return c;
1510 /* Interpret a (possibly wide) character constant in TOKEN.
1511 WARN_MULTI warns about multi-character charconsts. PCHARS_SEEN
1512 points to a variable that is filled in with the number of
1513 characters seen, and UNSIGNEDP to a variable that indicates whether
1514 the result has signed type. */
1515 cppchar_t
1516 cpp_interpret_charconst (cpp_reader *pfile, const cpp_token *token,
1517 unsigned int *pchars_seen, int *unsignedp)
1519 const unsigned char *str, *limit;
1520 unsigned int chars_seen = 0;
1521 size_t width, max_chars;
1522 cppchar_t c, mask, result = 0;
1523 bool unsigned_p;
1525 str = token->val.str.text + 1 + (token->type == CPP_WCHAR);
1526 limit = token->val.str.text + token->val.str.len - 1;
1528 if (token->type == CPP_CHAR)
1530 width = CPP_OPTION (pfile, char_precision);
1531 max_chars = CPP_OPTION (pfile, int_precision) / width;
1532 unsigned_p = CPP_OPTION (pfile, unsigned_char);
1534 else
1536 width = CPP_OPTION (pfile, wchar_precision);
1537 max_chars = 1;
1538 unsigned_p = CPP_OPTION (pfile, unsigned_wchar);
1541 if (width < BITS_PER_CPPCHAR_T)
1542 mask = ((cppchar_t) 1 << width) - 1;
1543 else
1544 mask = ~0;
1546 while (str < limit)
1548 c = *str++;
1550 if (c == '\\')
1551 c = cpp_parse_escape (pfile, &str, limit, token->type == CPP_WCHAR);
1553 #ifdef MAP_CHARACTER
1554 if (ISPRINT (c))
1555 c = MAP_CHARACTER (c);
1556 #endif
1558 chars_seen++;
1560 /* Truncate the character, scale the result and merge the two. */
1561 c &= mask;
1562 if (width < BITS_PER_CPPCHAR_T)
1563 result = (result << width) | c;
1564 else
1565 result = c;
1568 if (chars_seen == 0)
1569 cpp_error (pfile, DL_ERROR, "empty character constant");
1570 else if (chars_seen > 1)
1572 /* Multichar charconsts are of type int and therefore signed. */
1573 unsigned_p = 0;
1575 if (chars_seen > max_chars)
1577 chars_seen = max_chars;
1578 cpp_error (pfile, DL_WARNING,
1579 "character constant too long for its type");
1581 else if (CPP_OPTION (pfile, warn_multichar))
1582 cpp_error (pfile, DL_WARNING, "multi-character character constant");
1585 /* Sign-extend or truncate the constant to cppchar_t. The value is
1586 in WIDTH bits, but for multi-char charconsts it's value is the
1587 full target type's width. */
1588 if (chars_seen > 1)
1589 width *= max_chars;
1590 if (width < BITS_PER_CPPCHAR_T)
1592 mask = ((cppchar_t) 1 << width) - 1;
1593 if (unsigned_p || !(result & (1 << (width - 1))))
1594 result &= mask;
1595 else
1596 result |= ~mask;
1599 *pchars_seen = chars_seen;
1600 *unsignedp = unsigned_p;
1601 return result;
1604 /* Memory buffers. Changing these three constants can have a dramatic
1605 effect on performance. The values here are reasonable defaults,
1606 but might be tuned. If you adjust them, be sure to test across a
1607 range of uses of cpplib, including heavy nested function-like macro
1608 expansion. Also check the change in peak memory usage (NJAMD is a
1609 good tool for this). */
1610 #define MIN_BUFF_SIZE 8000
1611 #define BUFF_SIZE_UPPER_BOUND(MIN_SIZE) (MIN_BUFF_SIZE + (MIN_SIZE) * 3 / 2)
1612 #define EXTENDED_BUFF_SIZE(BUFF, MIN_EXTRA) \
1613 (MIN_EXTRA + ((BUFF)->limit - (BUFF)->cur) * 2)
1615 #if MIN_BUFF_SIZE > BUFF_SIZE_UPPER_BOUND (0)
1616 #error BUFF_SIZE_UPPER_BOUND must be at least as large as MIN_BUFF_SIZE!
1617 #endif
1619 /* Create a new allocation buffer. Place the control block at the end
1620 of the buffer, so that buffer overflows will cause immediate chaos. */
1621 static _cpp_buff *
1622 new_buff (size_t len)
1624 _cpp_buff *result;
1625 unsigned char *base;
1627 if (len < MIN_BUFF_SIZE)
1628 len = MIN_BUFF_SIZE;
1629 len = CPP_ALIGN (len);
1631 base = xmalloc (len + sizeof (_cpp_buff));
1632 result = (_cpp_buff *) (base + len);
1633 result->base = base;
1634 result->cur = base;
1635 result->limit = base + len;
1636 result->next = NULL;
1637 return result;
1640 /* Place a chain of unwanted allocation buffers on the free list. */
1641 void
1642 _cpp_release_buff (cpp_reader *pfile, _cpp_buff *buff)
1644 _cpp_buff *end = buff;
1646 while (end->next)
1647 end = end->next;
1648 end->next = pfile->free_buffs;
1649 pfile->free_buffs = buff;
1652 /* Return a free buffer of size at least MIN_SIZE. */
1653 _cpp_buff *
1654 _cpp_get_buff (cpp_reader *pfile, size_t min_size)
1656 _cpp_buff *result, **p;
1658 for (p = &pfile->free_buffs;; p = &(*p)->next)
1660 size_t size;
1662 if (*p == NULL)
1663 return new_buff (min_size);
1664 result = *p;
1665 size = result->limit - result->base;
1666 /* Return a buffer that's big enough, but don't waste one that's
1667 way too big. */
1668 if (size >= min_size && size <= BUFF_SIZE_UPPER_BOUND (min_size))
1669 break;
1672 *p = result->next;
1673 result->next = NULL;
1674 result->cur = result->base;
1675 return result;
1678 /* Creates a new buffer with enough space to hold the uncommitted
1679 remaining bytes of BUFF, and at least MIN_EXTRA more bytes. Copies
1680 the excess bytes to the new buffer. Chains the new buffer after
1681 BUFF, and returns the new buffer. */
1682 _cpp_buff *
1683 _cpp_append_extend_buff (cpp_reader *pfile, _cpp_buff *buff, size_t min_extra)
1685 size_t size = EXTENDED_BUFF_SIZE (buff, min_extra);
1686 _cpp_buff *new_buff = _cpp_get_buff (pfile, size);
1688 buff->next = new_buff;
1689 memcpy (new_buff->base, buff->cur, BUFF_ROOM (buff));
1690 return new_buff;
1693 /* Creates a new buffer with enough space to hold the uncommitted
1694 remaining bytes of the buffer pointed to by BUFF, and at least
1695 MIN_EXTRA more bytes. Copies the excess bytes to the new buffer.
1696 Chains the new buffer before the buffer pointed to by BUFF, and
1697 updates the pointer to point to the new buffer. */
1698 void
1699 _cpp_extend_buff (cpp_reader *pfile, _cpp_buff **pbuff, size_t min_extra)
1701 _cpp_buff *new_buff, *old_buff = *pbuff;
1702 size_t size = EXTENDED_BUFF_SIZE (old_buff, min_extra);
1704 new_buff = _cpp_get_buff (pfile, size);
1705 memcpy (new_buff->base, old_buff->cur, BUFF_ROOM (old_buff));
1706 new_buff->next = old_buff;
1707 *pbuff = new_buff;
1710 /* Free a chain of buffers starting at BUFF. */
1711 void
1712 _cpp_free_buff (buff)
1713 _cpp_buff *buff;
1715 _cpp_buff *next;
1717 for (; buff; buff = next)
1719 next = buff->next;
1720 free (buff->base);
1724 /* Allocate permanent, unaligned storage of length LEN. */
1725 unsigned char *
1726 _cpp_unaligned_alloc (cpp_reader *pfile, size_t len)
1728 _cpp_buff *buff = pfile->u_buff;
1729 unsigned char *result = buff->cur;
1731 if (len > (size_t) (buff->limit - result))
1733 buff = _cpp_get_buff (pfile, len);
1734 buff->next = pfile->u_buff;
1735 pfile->u_buff = buff;
1736 result = buff->cur;
1739 buff->cur = result + len;
1740 return result;
1743 /* Allocate permanent, unaligned storage of length LEN from a_buff.
1744 That buffer is used for growing allocations when saving macro
1745 replacement lists in a #define, and when parsing an answer to an
1746 assertion in #assert, #unassert or #if (and therefore possibly
1747 whilst expanding macros). It therefore must not be used by any
1748 code that they might call: specifically the lexer and the guts of
1749 the macro expander.
1751 All existing other uses clearly fit this restriction: storing
1752 registered pragmas during initialization. */
1753 unsigned char *
1754 _cpp_aligned_alloc (cpp_reader *pfile, size_t len)
1756 _cpp_buff *buff = pfile->a_buff;
1757 unsigned char *result = buff->cur;
1759 if (len > (size_t) (buff->limit - result))
1761 buff = _cpp_get_buff (pfile, len);
1762 buff->next = pfile->a_buff;
1763 pfile->a_buff = buff;
1764 result = buff->cur;
1767 buff->cur = result + len;
1768 return result;