update email addresses
[official-gcc.git] / gcc / cpplex.c
blob5f3e3f62fcb5ee617482893296cf624e2050a1ad
1 /* CPP Library - lexical analysis.
2 Copyright (C) 2000, 2001, 2002 Free Software Foundation, Inc.
3 Contributed by Per Bothner, 1994-95.
4 Based on CCCP program by Paul Rubin, June 1986
5 Adapted to ANSI C, Richard Stallman, Jan 1987
6 Broken out to separate file, Zack Weinberg, Mar 2000
8 This program is free software; you can redistribute it and/or modify it
9 under the terms of the GNU General Public License as published by the
10 Free Software Foundation; either version 2, or (at your option) any
11 later version.
13 This program is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with this program; if not, write to the Free Software
20 Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "tm.h"
26 #include "cpplib.h"
27 #include "cpphash.h"
29 enum spell_type
31 SPELL_OPERATOR = 0,
32 SPELL_IDENT,
33 SPELL_LITERAL,
34 SPELL_NONE
37 struct token_spelling
39 enum spell_type category;
40 const unsigned char *name;
43 static const unsigned char *const digraph_spellings[] =
44 { U"%:", U"%:%:", U"<:", U":>", U"<%", U"%>" };
46 #define OP(e, s) { SPELL_OPERATOR, U s },
47 #define TK(e, s) { s, U STRINGX (e) },
48 static const struct token_spelling token_spellings[N_TTYPES] = { TTYPE_TABLE };
49 #undef OP
50 #undef TK
52 #define TOKEN_SPELL(token) (token_spellings[(token)->type].category)
53 #define TOKEN_NAME(token) (token_spellings[(token)->type].name)
55 static void add_line_note PARAMS ((cpp_buffer *, const uchar *, unsigned int));
56 static int skip_line_comment PARAMS ((cpp_reader *));
57 static void skip_whitespace PARAMS ((cpp_reader *, cppchar_t));
58 static cpp_hashnode *lex_identifier PARAMS ((cpp_reader *, const uchar *));
59 static void lex_number PARAMS ((cpp_reader *, cpp_string *));
60 static bool forms_identifier_p PARAMS ((cpp_reader *, int));
61 static void lex_string PARAMS ((cpp_reader *, cpp_token *, const uchar *));
62 static void save_comment PARAMS ((cpp_reader *, cpp_token *, const uchar *,
63 cppchar_t));
64 static void create_literal PARAMS ((cpp_reader *, cpp_token *, const uchar *,
65 unsigned int, enum cpp_ttype));
66 static bool warn_in_comment PARAMS ((cpp_reader *, _cpp_line_note *));
67 static int name_p PARAMS ((cpp_reader *, const cpp_string *));
68 static cppchar_t maybe_read_ucn PARAMS ((cpp_reader *, const uchar **));
69 static tokenrun *next_tokenrun PARAMS ((tokenrun *));
71 static unsigned int hex_digit_value PARAMS ((unsigned int));
72 static _cpp_buff *new_buff PARAMS ((size_t));
75 /* Utility routine:
77 Compares, the token TOKEN to the NUL-terminated string STRING.
78 TOKEN must be a CPP_NAME. Returns 1 for equal, 0 for unequal. */
79 int
80 cpp_ideq (token, string)
81 const cpp_token *token;
82 const char *string;
84 if (token->type != CPP_NAME)
85 return 0;
87 return !ustrcmp (NODE_NAME (token->val.node), (const uchar *) string);
90 /* Record a note TYPE at byte POS into the current cleaned logical
91 line. */
92 static void
93 add_line_note (buffer, pos, type)
94 cpp_buffer *buffer;
95 const uchar *pos;
96 unsigned int type;
98 if (buffer->notes_used == buffer->notes_cap)
100 buffer->notes_cap = buffer->notes_cap * 2 + 200;
101 buffer->notes = (_cpp_line_note *)
102 xrealloc (buffer->notes, buffer->notes_cap * sizeof (_cpp_line_note));
105 buffer->notes[buffer->notes_used].pos = pos;
106 buffer->notes[buffer->notes_used].type = type;
107 buffer->notes_used++;
110 /* Returns with a logical line that contains no escaped newlines or
111 trigraphs. This is a time-critical inner loop. */
112 void
113 _cpp_clean_line (pfile)
114 cpp_reader *pfile;
116 cpp_buffer *buffer;
117 const uchar *s;
118 uchar c, *d, *p;
120 buffer = pfile->buffer;
121 buffer->cur_note = buffer->notes_used = 0;
122 buffer->cur = buffer->line_base = buffer->next_line;
123 buffer->need_line = false;
124 s = buffer->next_line - 1;
126 if (!buffer->from_stage3)
128 d = (uchar *) s;
130 for (;;)
132 c = *++s;
133 *++d = c;
135 if (c == '\n' || c == '\r')
137 /* Handle DOS line endings. */
138 if (c == '\r' && s != buffer->rlimit && s[1] == '\n')
139 s++;
140 if (s == buffer->rlimit)
141 break;
143 /* Escaped? */
144 p = d;
145 while (p != buffer->next_line && is_nvspace (p[-1]))
146 p--;
147 if (p == buffer->next_line || p[-1] != '\\')
148 break;
150 add_line_note (buffer, p - 1, p != d ? ' ': '\\');
151 d = p - 2;
152 buffer->next_line = p - 1;
154 else if (c == '?' && s[1] == '?' && _cpp_trigraph_map[s[2]])
156 /* Add a note regardless, for the benefit of -Wtrigraphs. */
157 add_line_note (buffer, d, s[2]);
158 if (CPP_OPTION (pfile, trigraphs))
160 *d = _cpp_trigraph_map[s[2]];
161 s += 2;
166 else
169 s++;
170 while (*s != '\n' && *s != '\r');
171 d = (uchar *) s;
173 /* Handle DOS line endings. */
174 if (*s == '\r' && s != buffer->rlimit && s[1] == '\n')
175 s++;
178 *d = '\n';
179 /* A sentinel note that should never be processed. */
180 add_line_note (buffer, d + 1, '\n');
181 buffer->next_line = s + 1;
184 /* Return true if the trigraph indicated by NOTE should be warned
185 about in a comment. */
186 static bool
187 warn_in_comment (pfile, note)
188 cpp_reader *pfile;
189 _cpp_line_note *note;
191 const uchar *p;
193 /* Within comments we don't warn about trigraphs, unless the
194 trigraph forms an escaped newline, as that may change
195 behaviour. */
196 if (note->type != '/')
197 return false;
199 /* If -trigraphs, then this was an escaped newline iff the next note
200 is coincident. */
201 if (CPP_OPTION (pfile, trigraphs))
202 return note[1].pos == note->pos;
204 /* Otherwise, see if this forms an escaped newline. */
205 p = note->pos + 3;
206 while (is_nvspace (*p))
207 p++;
209 /* There might have been escaped newlines between the trigraph and the
210 newline we found. Hence the position test. */
211 return (*p == '\n' && p < note[1].pos);
214 /* Process the notes created by add_line_note as far as the current
215 location. */
216 void
217 _cpp_process_line_notes (pfile, in_comment)
218 cpp_reader *pfile;
219 int in_comment;
221 cpp_buffer *buffer = pfile->buffer;
223 for (;;)
225 _cpp_line_note *note = &buffer->notes[buffer->cur_note];
226 unsigned int col;
228 if (note->pos > buffer->cur)
229 break;
231 buffer->cur_note++;
232 col = CPP_BUF_COLUMN (buffer, note->pos + 1);
234 if (note->type == '\\' || note->type == ' ')
236 if (note->type == ' ' && !in_comment)
237 cpp_error_with_line (pfile, DL_WARNING, pfile->line, col,
238 "backslash and newline separated by space");
240 if (buffer->next_line > buffer->rlimit)
242 cpp_error_with_line (pfile, DL_PEDWARN, pfile->line, col,
243 "backslash-newline at end of file");
244 /* Prevent "no newline at end of file" warning. */
245 buffer->next_line = buffer->rlimit;
248 buffer->line_base = note->pos;
249 pfile->line++;
251 else if (_cpp_trigraph_map[note->type])
253 if (CPP_OPTION (pfile, warn_trigraphs)
254 && (!in_comment || warn_in_comment (pfile, note)))
256 if (CPP_OPTION (pfile, trigraphs))
257 cpp_error_with_line (pfile, DL_WARNING, pfile->line, col,
258 "trigraph ??%c converted to %c",
259 note->type,
260 (int) _cpp_trigraph_map[note->type]);
261 else
262 cpp_error_with_line (pfile, DL_WARNING, pfile->line, col,
263 "trigraph ??%c ignored",
264 note->type);
267 else
268 abort ();
272 /* Skip a C-style block comment. We find the end of the comment by
273 seeing if an asterisk is before every '/' we encounter. Returns
274 nonzero if comment terminated by EOF, zero otherwise.
276 Buffer->cur points to the initial asterisk of the comment. */
277 bool
278 _cpp_skip_block_comment (pfile)
279 cpp_reader *pfile;
281 cpp_buffer *buffer = pfile->buffer;
282 cppchar_t c;
284 buffer->cur++;
285 if (*buffer->cur == '/')
286 buffer->cur++;
288 for (;;)
290 c = *buffer->cur++;
292 /* People like decorating comments with '*', so check for '/'
293 instead for efficiency. */
294 if (c == '/')
296 if (buffer->cur[-2] == '*')
297 break;
299 /* Warn about potential nested comments, but not if the '/'
300 comes immediately before the true comment delimiter.
301 Don't bother to get it right across escaped newlines. */
302 if (CPP_OPTION (pfile, warn_comments)
303 && buffer->cur[0] == '*' && buffer->cur[1] != '/')
304 cpp_error_with_line (pfile, DL_WARNING,
305 pfile->line, CPP_BUF_COL (buffer),
306 "\"/*\" within comment");
308 else if (c == '\n')
310 buffer->cur--;
311 _cpp_process_line_notes (pfile, true);
312 if (buffer->next_line >= buffer->rlimit)
313 return true;
314 _cpp_clean_line (pfile);
315 pfile->line++;
319 _cpp_process_line_notes (pfile, true);
320 return false;
323 /* Skip a C++ line comment, leaving buffer->cur pointing to the
324 terminating newline. Handles escaped newlines. Returns nonzero
325 if a multiline comment. */
326 static int
327 skip_line_comment (pfile)
328 cpp_reader *pfile;
330 cpp_buffer *buffer = pfile->buffer;
331 unsigned int orig_line = pfile->line;
333 while (*buffer->cur != '\n')
334 buffer->cur++;
336 _cpp_process_line_notes (pfile, true);
337 return orig_line != pfile->line;
340 /* Skips whitespace, saving the next non-whitespace character. */
341 static void
342 skip_whitespace (pfile, c)
343 cpp_reader *pfile;
344 cppchar_t c;
346 cpp_buffer *buffer = pfile->buffer;
347 bool saw_NUL = false;
351 /* Horizontal space always OK. */
352 if (c == ' ' || c == '\t')
354 /* Just \f \v or \0 left. */
355 else if (c == '\0')
356 saw_NUL = true;
357 else if (pfile->state.in_directive && CPP_PEDANTIC (pfile))
358 cpp_error_with_line (pfile, DL_PEDWARN, pfile->line,
359 CPP_BUF_COL (buffer),
360 "%s in preprocessing directive",
361 c == '\f' ? "form feed" : "vertical tab");
363 c = *buffer->cur++;
365 /* We only want non-vertical space, i.e. ' ' \t \f \v \0. */
366 while (is_nvspace (c));
368 if (saw_NUL)
369 cpp_error (pfile, DL_WARNING, "null character(s) ignored");
371 buffer->cur--;
374 /* See if the characters of a number token are valid in a name (no
375 '.', '+' or '-'). */
376 static int
377 name_p (pfile, string)
378 cpp_reader *pfile;
379 const cpp_string *string;
381 unsigned int i;
383 for (i = 0; i < string->len; i++)
384 if (!is_idchar (string->text[i]))
385 return 0;
387 return 1;
390 /* Returns TRUE if the sequence starting at buffer->cur is invalid in
391 an identifier. FIRST is TRUE if this starts an identifier. */
392 static bool
393 forms_identifier_p (pfile, first)
394 cpp_reader *pfile;
395 int first;
397 cpp_buffer *buffer = pfile->buffer;
399 if (*buffer->cur == '$')
401 if (!CPP_OPTION (pfile, dollars_in_ident))
402 return false;
404 buffer->cur++;
405 if (CPP_PEDANTIC (pfile)
406 && !pfile->state.skipping
407 && !pfile->warned_dollar)
409 pfile->warned_dollar = true;
410 cpp_error (pfile, DL_PEDWARN, "'$' in identifier or number");
413 return true;
416 /* Is this a syntactically valid UCN? */
417 if (0 && *buffer->cur == '\\'
418 && (buffer->cur[1] == 'u' || buffer->cur[1] == 'U'))
420 buffer->cur += 2;
421 if (_cpp_valid_ucn (pfile, &buffer->cur, 1 + !first))
422 return true;
423 buffer->cur -= 2;
426 return false;
429 /* Lex an identifier starting at BUFFER->CUR - 1. */
430 static cpp_hashnode *
431 lex_identifier (pfile, base)
432 cpp_reader *pfile;
433 const uchar *base;
435 cpp_hashnode *result;
436 const uchar *cur;
440 cur = pfile->buffer->cur;
442 /* N.B. ISIDNUM does not include $. */
443 while (ISIDNUM (*cur))
444 cur++;
446 pfile->buffer->cur = cur;
448 while (forms_identifier_p (pfile, false));
450 result = (cpp_hashnode *)
451 ht_lookup (pfile->hash_table, base, cur - base, HT_ALLOC);
453 /* Rarely, identifiers require diagnostics when lexed. */
454 if (__builtin_expect ((result->flags & NODE_DIAGNOSTIC)
455 && !pfile->state.skipping, 0))
457 /* It is allowed to poison the same identifier twice. */
458 if ((result->flags & NODE_POISONED) && !pfile->state.poisoned_ok)
459 cpp_error (pfile, DL_ERROR, "attempt to use poisoned \"%s\"",
460 NODE_NAME (result));
462 /* Constraint 6.10.3.5: __VA_ARGS__ should only appear in the
463 replacement list of a variadic macro. */
464 if (result == pfile->spec_nodes.n__VA_ARGS__
465 && !pfile->state.va_args_ok)
466 cpp_error (pfile, DL_PEDWARN,
467 "__VA_ARGS__ can only appear in the expansion of a C99 variadic macro");
470 return result;
473 /* Lex a number to NUMBER starting at BUFFER->CUR - 1. */
474 static void
475 lex_number (pfile, number)
476 cpp_reader *pfile;
477 cpp_string *number;
479 const uchar *cur;
480 const uchar *base;
481 uchar *dest;
483 base = pfile->buffer->cur - 1;
486 cur = pfile->buffer->cur;
488 /* N.B. ISIDNUM does not include $. */
489 while (ISIDNUM (*cur) || *cur == '.' || VALID_SIGN (*cur, cur[-1]))
490 cur++;
492 pfile->buffer->cur = cur;
494 while (forms_identifier_p (pfile, false));
496 number->len = cur - base;
497 dest = _cpp_unaligned_alloc (pfile, number->len + 1);
498 memcpy (dest, base, number->len);
499 dest[number->len] = '\0';
500 number->text = dest;
503 /* Create a token of type TYPE with a literal spelling. */
504 static void
505 create_literal (pfile, token, base, len, type)
506 cpp_reader *pfile;
507 cpp_token *token;
508 const uchar *base;
509 unsigned int len;
510 enum cpp_ttype type;
512 uchar *dest = _cpp_unaligned_alloc (pfile, len + 1);
514 memcpy (dest, base, len);
515 dest[len] = '\0';
516 token->type = type;
517 token->val.str.len = len;
518 token->val.str.text = dest;
521 /* Lexes a string, character constant, or angle-bracketed header file
522 name. The stored string contains the spelling, including opening
523 quote and leading any leading 'L'. It returns the type of the
524 literal, or CPP_OTHER if it was not properly terminated.
526 The spelling is NUL-terminated, but it is not guaranteed that this
527 is the first NUL since embedded NULs are preserved. */
528 static void
529 lex_string (pfile, token, base)
530 cpp_reader *pfile;
531 cpp_token *token;
532 const uchar *base;
534 bool saw_NUL = false;
535 const uchar *cur;
536 cppchar_t terminator;
537 enum cpp_ttype type;
539 cur = base;
540 terminator = *cur++;
541 if (terminator == 'L')
542 terminator = *cur++;
543 if (terminator == '\"')
544 type = *base == 'L' ? CPP_WSTRING: CPP_STRING;
545 else if (terminator == '\'')
546 type = *base == 'L' ? CPP_WCHAR: CPP_CHAR;
547 else
548 terminator = '>', type = CPP_HEADER_NAME;
550 for (;;)
552 cppchar_t c = *cur++;
554 /* In #include-style directives, terminators are not escapable. */
555 if (c == '\\' && !pfile->state.angled_headers && *cur != '\n')
556 cur++;
557 else if (c == terminator)
558 break;
559 else if (c == '\n')
561 cur--;
562 type = CPP_OTHER;
563 break;
565 else if (c == '\0')
566 saw_NUL = true;
569 if (saw_NUL && !pfile->state.skipping)
570 cpp_error (pfile, DL_WARNING, "null character(s) preserved in literal");
572 pfile->buffer->cur = cur;
573 create_literal (pfile, token, base, cur - base, type);
576 /* The stored comment includes the comment start and any terminator. */
577 static void
578 save_comment (pfile, token, from, type)
579 cpp_reader *pfile;
580 cpp_token *token;
581 const unsigned char *from;
582 cppchar_t type;
584 unsigned char *buffer;
585 unsigned int len, clen;
587 len = pfile->buffer->cur - from + 1; /* + 1 for the initial '/'. */
589 /* C++ comments probably (not definitely) have moved past a new
590 line, which we don't want to save in the comment. */
591 if (is_vspace (pfile->buffer->cur[-1]))
592 len--;
594 /* If we are currently in a directive, then we need to store all
595 C++ comments as C comments internally, and so we need to
596 allocate a little extra space in that case.
598 Note that the only time we encounter a directive here is
599 when we are saving comments in a "#define". */
600 clen = (pfile->state.in_directive && type == '/') ? len + 2 : len;
602 buffer = _cpp_unaligned_alloc (pfile, clen);
604 token->type = CPP_COMMENT;
605 token->val.str.len = clen;
606 token->val.str.text = buffer;
608 buffer[0] = '/';
609 memcpy (buffer + 1, from, len - 1);
611 /* Finish conversion to a C comment, if necessary. */
612 if (pfile->state.in_directive && type == '/')
614 buffer[1] = '*';
615 buffer[clen - 2] = '*';
616 buffer[clen - 1] = '/';
620 /* Allocate COUNT tokens for RUN. */
621 void
622 _cpp_init_tokenrun (run, count)
623 tokenrun *run;
624 unsigned int count;
626 run->base = xnewvec (cpp_token, count);
627 run->limit = run->base + count;
628 run->next = NULL;
631 /* Returns the next tokenrun, or creates one if there is none. */
632 static tokenrun *
633 next_tokenrun (run)
634 tokenrun *run;
636 if (run->next == NULL)
638 run->next = xnew (tokenrun);
639 run->next->prev = run;
640 _cpp_init_tokenrun (run->next, 250);
643 return run->next;
646 /* Allocate a single token that is invalidated at the same time as the
647 rest of the tokens on the line. Has its line and col set to the
648 same as the last lexed token, so that diagnostics appear in the
649 right place. */
650 cpp_token *
651 _cpp_temp_token (pfile)
652 cpp_reader *pfile;
654 cpp_token *old, *result;
656 old = pfile->cur_token - 1;
657 if (pfile->cur_token == pfile->cur_run->limit)
659 pfile->cur_run = next_tokenrun (pfile->cur_run);
660 pfile->cur_token = pfile->cur_run->base;
663 result = pfile->cur_token++;
664 result->line = old->line;
665 result->col = old->col;
666 return result;
669 /* Lex a token into RESULT (external interface). Takes care of issues
670 like directive handling, token lookahead, multiple include
671 optimization and skipping. */
672 const cpp_token *
673 _cpp_lex_token (pfile)
674 cpp_reader *pfile;
676 cpp_token *result;
678 for (;;)
680 if (pfile->cur_token == pfile->cur_run->limit)
682 pfile->cur_run = next_tokenrun (pfile->cur_run);
683 pfile->cur_token = pfile->cur_run->base;
686 if (pfile->lookaheads)
688 pfile->lookaheads--;
689 result = pfile->cur_token++;
691 else
692 result = _cpp_lex_direct (pfile);
694 if (result->flags & BOL)
696 /* Is this a directive. If _cpp_handle_directive returns
697 false, it is an assembler #. */
698 if (result->type == CPP_HASH
699 /* 6.10.3 p 11: Directives in a list of macro arguments
700 gives undefined behavior. This implementation
701 handles the directive as normal. */
702 && pfile->state.parsing_args != 1
703 && _cpp_handle_directive (pfile, result->flags & PREV_WHITE))
704 continue;
705 if (pfile->cb.line_change && !pfile->state.skipping)
706 (*pfile->cb.line_change)(pfile, result, pfile->state.parsing_args);
709 /* We don't skip tokens in directives. */
710 if (pfile->state.in_directive)
711 break;
713 /* Outside a directive, invalidate controlling macros. At file
714 EOF, _cpp_lex_direct takes care of popping the buffer, so we never
715 get here and MI optimisation works. */
716 pfile->mi_valid = false;
718 if (!pfile->state.skipping || result->type == CPP_EOF)
719 break;
722 return result;
725 /* Returns true if a fresh line has been loaded. */
726 bool
727 _cpp_get_fresh_line (pfile)
728 cpp_reader *pfile;
730 /* We can't get a new line until we leave the current directive. */
731 if (pfile->state.in_directive)
732 return false;
734 for (;;)
736 cpp_buffer *buffer = pfile->buffer;
738 if (!buffer->need_line)
739 return true;
741 if (buffer->next_line < buffer->rlimit)
743 _cpp_clean_line (pfile);
744 return true;
747 /* First, get out of parsing arguments state. */
748 if (pfile->state.parsing_args)
749 return false;
751 /* End of buffer. Non-empty files should end in a newline. */
752 if (buffer->buf != buffer->rlimit
753 && buffer->next_line > buffer->rlimit
754 && !buffer->from_stage3)
756 /* Only warn once. */
757 buffer->next_line = buffer->rlimit;
758 cpp_error_with_line (pfile, DL_PEDWARN, pfile->line - 1,
759 CPP_BUF_COLUMN (buffer, buffer->cur),
760 "no newline at end of file");
763 if (!buffer->prev)
764 return false;
766 if (buffer->return_at_eof)
768 _cpp_pop_buffer (pfile);
769 return false;
772 _cpp_pop_buffer (pfile);
776 #define IF_NEXT_IS(CHAR, THEN_TYPE, ELSE_TYPE) \
777 do \
779 result->type = ELSE_TYPE; \
780 if (*buffer->cur == CHAR) \
781 buffer->cur++, result->type = THEN_TYPE; \
783 while (0)
785 /* Lex a token into pfile->cur_token, which is also incremented, to
786 get diagnostics pointing to the correct location.
788 Does not handle issues such as token lookahead, multiple-include
789 optimisation, directives, skipping etc. This function is only
790 suitable for use by _cpp_lex_token, and in special cases like
791 lex_expansion_token which doesn't care for any of these issues.
793 When meeting a newline, returns CPP_EOF if parsing a directive,
794 otherwise returns to the start of the token buffer if permissible.
795 Returns the location of the lexed token. */
796 cpp_token *
797 _cpp_lex_direct (pfile)
798 cpp_reader *pfile;
800 cppchar_t c;
801 cpp_buffer *buffer;
802 const unsigned char *comment_start;
803 cpp_token *result = pfile->cur_token++;
805 fresh_line:
806 result->flags = 0;
807 if (pfile->buffer->need_line)
809 if (!_cpp_get_fresh_line (pfile))
811 result->type = CPP_EOF;
812 if (!pfile->state.in_directive)
814 /* Tell the compiler the line number of the EOF token. */
815 result->line = pfile->line;
816 result->flags = BOL;
818 return result;
820 if (!pfile->keep_tokens)
822 pfile->cur_run = &pfile->base_run;
823 result = pfile->base_run.base;
824 pfile->cur_token = result + 1;
826 result->flags = BOL;
827 if (pfile->state.parsing_args == 2)
828 result->flags |= PREV_WHITE;
830 buffer = pfile->buffer;
831 update_tokens_line:
832 result->line = pfile->line;
834 skipped_white:
835 if (buffer->cur >= buffer->notes[buffer->cur_note].pos
836 && !pfile->overlaid_buffer)
838 _cpp_process_line_notes (pfile, false);
839 result->line = pfile->line;
841 c = *buffer->cur++;
842 result->col = CPP_BUF_COLUMN (buffer, buffer->cur);
844 switch (c)
846 case ' ': case '\t': case '\f': case '\v': case '\0':
847 result->flags |= PREV_WHITE;
848 skip_whitespace (pfile, c);
849 goto skipped_white;
851 case '\n':
852 pfile->line++;
853 buffer->need_line = true;
854 goto fresh_line;
856 case '0': case '1': case '2': case '3': case '4':
857 case '5': case '6': case '7': case '8': case '9':
858 result->type = CPP_NUMBER;
859 lex_number (pfile, &result->val.str);
860 break;
862 case 'L':
863 /* 'L' may introduce wide characters or strings. */
864 if (*buffer->cur == '\'' || *buffer->cur == '"')
866 lex_string (pfile, result, buffer->cur - 1);
867 break;
869 /* Fall through. */
871 case '_':
872 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
873 case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
874 case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
875 case 's': case 't': case 'u': case 'v': case 'w': case 'x':
876 case 'y': case 'z':
877 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
878 case 'G': case 'H': case 'I': case 'J': case 'K':
879 case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
880 case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
881 case 'Y': case 'Z':
882 result->type = CPP_NAME;
883 result->val.node = lex_identifier (pfile, buffer->cur - 1);
885 /* Convert named operators to their proper types. */
886 if (result->val.node->flags & NODE_OPERATOR)
888 result->flags |= NAMED_OP;
889 result->type = result->val.node->directive_index;
891 break;
893 case '\'':
894 case '"':
895 lex_string (pfile, result, buffer->cur - 1);
896 break;
898 case '/':
899 /* A potential block or line comment. */
900 comment_start = buffer->cur;
901 c = *buffer->cur;
903 if (c == '*')
905 if (_cpp_skip_block_comment (pfile))
906 cpp_error (pfile, DL_ERROR, "unterminated comment");
908 else if (c == '/' && (CPP_OPTION (pfile, cplusplus_comments)
909 || CPP_IN_SYSTEM_HEADER (pfile)))
911 /* Warn about comments only if pedantically GNUC89, and not
912 in system headers. */
913 if (CPP_OPTION (pfile, lang) == CLK_GNUC89 && CPP_PEDANTIC (pfile)
914 && ! buffer->warned_cplusplus_comments)
916 cpp_error (pfile, DL_PEDWARN,
917 "C++ style comments are not allowed in ISO C90");
918 cpp_error (pfile, DL_PEDWARN,
919 "(this will be reported only once per input file)");
920 buffer->warned_cplusplus_comments = 1;
923 if (skip_line_comment (pfile) && CPP_OPTION (pfile, warn_comments))
924 cpp_error (pfile, DL_WARNING, "multi-line comment");
926 else if (c == '=')
928 buffer->cur++;
929 result->type = CPP_DIV_EQ;
930 break;
932 else
934 result->type = CPP_DIV;
935 break;
938 if (!pfile->state.save_comments)
940 result->flags |= PREV_WHITE;
941 goto update_tokens_line;
944 /* Save the comment as a token in its own right. */
945 save_comment (pfile, result, comment_start, c);
946 break;
948 case '<':
949 if (pfile->state.angled_headers)
951 lex_string (pfile, result, buffer->cur - 1);
952 break;
955 result->type = CPP_LESS;
956 if (*buffer->cur == '=')
957 buffer->cur++, result->type = CPP_LESS_EQ;
958 else if (*buffer->cur == '<')
960 buffer->cur++;
961 IF_NEXT_IS ('=', CPP_LSHIFT_EQ, CPP_LSHIFT);
963 else if (*buffer->cur == '?' && CPP_OPTION (pfile, cplusplus))
965 buffer->cur++;
966 IF_NEXT_IS ('=', CPP_MIN_EQ, CPP_MIN);
968 else if (CPP_OPTION (pfile, digraphs))
970 if (*buffer->cur == ':')
972 buffer->cur++;
973 result->flags |= DIGRAPH;
974 result->type = CPP_OPEN_SQUARE;
976 else if (*buffer->cur == '%')
978 buffer->cur++;
979 result->flags |= DIGRAPH;
980 result->type = CPP_OPEN_BRACE;
983 break;
985 case '>':
986 result->type = CPP_GREATER;
987 if (*buffer->cur == '=')
988 buffer->cur++, result->type = CPP_GREATER_EQ;
989 else if (*buffer->cur == '>')
991 buffer->cur++;
992 IF_NEXT_IS ('=', CPP_RSHIFT_EQ, CPP_RSHIFT);
994 else if (*buffer->cur == '?' && CPP_OPTION (pfile, cplusplus))
996 buffer->cur++;
997 IF_NEXT_IS ('=', CPP_MAX_EQ, CPP_MAX);
999 break;
1001 case '%':
1002 result->type = CPP_MOD;
1003 if (*buffer->cur == '=')
1004 buffer->cur++, result->type = CPP_MOD_EQ;
1005 else if (CPP_OPTION (pfile, digraphs))
1007 if (*buffer->cur == ':')
1009 buffer->cur++;
1010 result->flags |= DIGRAPH;
1011 result->type = CPP_HASH;
1012 if (*buffer->cur == '%' && buffer->cur[1] == ':')
1013 buffer->cur += 2, result->type = CPP_PASTE;
1015 else if (*buffer->cur == '>')
1017 buffer->cur++;
1018 result->flags |= DIGRAPH;
1019 result->type = CPP_CLOSE_BRACE;
1022 break;
1024 case '.':
1025 result->type = CPP_DOT;
1026 if (ISDIGIT (*buffer->cur))
1028 result->type = CPP_NUMBER;
1029 lex_number (pfile, &result->val.str);
1031 else if (*buffer->cur == '.' && buffer->cur[1] == '.')
1032 buffer->cur += 2, result->type = CPP_ELLIPSIS;
1033 else if (*buffer->cur == '*' && CPP_OPTION (pfile, cplusplus))
1034 buffer->cur++, result->type = CPP_DOT_STAR;
1035 break;
1037 case '+':
1038 result->type = CPP_PLUS;
1039 if (*buffer->cur == '+')
1040 buffer->cur++, result->type = CPP_PLUS_PLUS;
1041 else if (*buffer->cur == '=')
1042 buffer->cur++, result->type = CPP_PLUS_EQ;
1043 break;
1045 case '-':
1046 result->type = CPP_MINUS;
1047 if (*buffer->cur == '>')
1049 buffer->cur++;
1050 result->type = CPP_DEREF;
1051 if (*buffer->cur == '*' && CPP_OPTION (pfile, cplusplus))
1052 buffer->cur++, result->type = CPP_DEREF_STAR;
1054 else if (*buffer->cur == '-')
1055 buffer->cur++, result->type = CPP_MINUS_MINUS;
1056 else if (*buffer->cur == '=')
1057 buffer->cur++, result->type = CPP_MINUS_EQ;
1058 break;
1060 case '&':
1061 result->type = CPP_AND;
1062 if (*buffer->cur == '&')
1063 buffer->cur++, result->type = CPP_AND_AND;
1064 else if (*buffer->cur == '=')
1065 buffer->cur++, result->type = CPP_AND_EQ;
1066 break;
1068 case '|':
1069 result->type = CPP_OR;
1070 if (*buffer->cur == '|')
1071 buffer->cur++, result->type = CPP_OR_OR;
1072 else if (*buffer->cur == '=')
1073 buffer->cur++, result->type = CPP_OR_EQ;
1074 break;
1076 case ':':
1077 result->type = CPP_COLON;
1078 if (*buffer->cur == ':' && CPP_OPTION (pfile, cplusplus))
1079 buffer->cur++, result->type = CPP_SCOPE;
1080 else if (*buffer->cur == '>' && CPP_OPTION (pfile, digraphs))
1082 buffer->cur++;
1083 result->flags |= DIGRAPH;
1084 result->type = CPP_CLOSE_SQUARE;
1086 break;
1088 case '*': IF_NEXT_IS ('=', CPP_MULT_EQ, CPP_MULT); break;
1089 case '=': IF_NEXT_IS ('=', CPP_EQ_EQ, CPP_EQ); break;
1090 case '!': IF_NEXT_IS ('=', CPP_NOT_EQ, CPP_NOT); break;
1091 case '^': IF_NEXT_IS ('=', CPP_XOR_EQ, CPP_XOR); break;
1092 case '#': IF_NEXT_IS ('#', CPP_PASTE, CPP_HASH); break;
1094 case '?': result->type = CPP_QUERY; break;
1095 case '~': result->type = CPP_COMPL; break;
1096 case ',': result->type = CPP_COMMA; break;
1097 case '(': result->type = CPP_OPEN_PAREN; break;
1098 case ')': result->type = CPP_CLOSE_PAREN; break;
1099 case '[': result->type = CPP_OPEN_SQUARE; break;
1100 case ']': result->type = CPP_CLOSE_SQUARE; break;
1101 case '{': result->type = CPP_OPEN_BRACE; break;
1102 case '}': result->type = CPP_CLOSE_BRACE; break;
1103 case ';': result->type = CPP_SEMICOLON; break;
1105 /* @ is a punctuator in Objective-C. */
1106 case '@': result->type = CPP_ATSIGN; break;
1108 case '$':
1109 case '\\':
1111 const uchar *base = --buffer->cur;
1113 if (forms_identifier_p (pfile, true))
1115 result->type = CPP_NAME;
1116 result->val.node = lex_identifier (pfile, base);
1117 break;
1119 buffer->cur++;
1122 default:
1123 create_literal (pfile, result, buffer->cur - 1, 1, CPP_OTHER);
1124 break;
1127 return result;
1130 /* An upper bound on the number of bytes needed to spell TOKEN.
1131 Does not include preceding whitespace. */
1132 unsigned int
1133 cpp_token_len (token)
1134 const cpp_token *token;
1136 unsigned int len;
1138 switch (TOKEN_SPELL (token))
1140 default: len = 4; break;
1141 case SPELL_LITERAL: len = token->val.str.len; break;
1142 case SPELL_IDENT: len = NODE_LEN (token->val.node); break;
1145 return len;
1148 /* Write the spelling of a token TOKEN to BUFFER. The buffer must
1149 already contain the enough space to hold the token's spelling.
1150 Returns a pointer to the character after the last character
1151 written. */
1152 unsigned char *
1153 cpp_spell_token (pfile, token, buffer)
1154 cpp_reader *pfile; /* Would be nice to be rid of this... */
1155 const cpp_token *token;
1156 unsigned char *buffer;
1158 switch (TOKEN_SPELL (token))
1160 case SPELL_OPERATOR:
1162 const unsigned char *spelling;
1163 unsigned char c;
1165 if (token->flags & DIGRAPH)
1166 spelling
1167 = digraph_spellings[(int) token->type - (int) CPP_FIRST_DIGRAPH];
1168 else if (token->flags & NAMED_OP)
1169 goto spell_ident;
1170 else
1171 spelling = TOKEN_NAME (token);
1173 while ((c = *spelling++) != '\0')
1174 *buffer++ = c;
1176 break;
1178 spell_ident:
1179 case SPELL_IDENT:
1180 memcpy (buffer, NODE_NAME (token->val.node), NODE_LEN (token->val.node));
1181 buffer += NODE_LEN (token->val.node);
1182 break;
1184 case SPELL_LITERAL:
1185 memcpy (buffer, token->val.str.text, token->val.str.len);
1186 buffer += token->val.str.len;
1187 break;
1189 case SPELL_NONE:
1190 cpp_error (pfile, DL_ICE, "unspellable token %s", TOKEN_NAME (token));
1191 break;
1194 return buffer;
1197 /* Returns TOKEN spelt as a null-terminated string. The string is
1198 freed when the reader is destroyed. Useful for diagnostics. */
1199 unsigned char *
1200 cpp_token_as_text (pfile, token)
1201 cpp_reader *pfile;
1202 const cpp_token *token;
1204 unsigned int len = cpp_token_len (token) + 1;
1205 unsigned char *start = _cpp_unaligned_alloc (pfile, len), *end;
1207 end = cpp_spell_token (pfile, token, start);
1208 end[0] = '\0';
1210 return start;
1213 /* Used by C front ends, which really should move to using
1214 cpp_token_as_text. */
1215 const char *
1216 cpp_type2name (type)
1217 enum cpp_ttype type;
1219 return (const char *) token_spellings[type].name;
1222 /* Writes the spelling of token to FP, without any preceding space.
1223 Separated from cpp_spell_token for efficiency - to avoid stdio
1224 double-buffering. */
1225 void
1226 cpp_output_token (token, fp)
1227 const cpp_token *token;
1228 FILE *fp;
1230 switch (TOKEN_SPELL (token))
1232 case SPELL_OPERATOR:
1234 const unsigned char *spelling;
1235 int c;
1237 if (token->flags & DIGRAPH)
1238 spelling
1239 = digraph_spellings[(int) token->type - (int) CPP_FIRST_DIGRAPH];
1240 else if (token->flags & NAMED_OP)
1241 goto spell_ident;
1242 else
1243 spelling = TOKEN_NAME (token);
1245 c = *spelling;
1247 putc (c, fp);
1248 while ((c = *++spelling) != '\0');
1250 break;
1252 spell_ident:
1253 case SPELL_IDENT:
1254 fwrite (NODE_NAME (token->val.node), 1, NODE_LEN (token->val.node), fp);
1255 break;
1257 case SPELL_LITERAL:
1258 fwrite (token->val.str.text, 1, token->val.str.len, fp);
1259 break;
1261 case SPELL_NONE:
1262 /* An error, most probably. */
1263 break;
1267 /* Compare two tokens. */
1269 _cpp_equiv_tokens (a, b)
1270 const cpp_token *a, *b;
1272 if (a->type == b->type && a->flags == b->flags)
1273 switch (TOKEN_SPELL (a))
1275 default: /* Keep compiler happy. */
1276 case SPELL_OPERATOR:
1277 return 1;
1278 case SPELL_NONE:
1279 return (a->type != CPP_MACRO_ARG || a->val.arg_no == b->val.arg_no);
1280 case SPELL_IDENT:
1281 return a->val.node == b->val.node;
1282 case SPELL_LITERAL:
1283 return (a->val.str.len == b->val.str.len
1284 && !memcmp (a->val.str.text, b->val.str.text,
1285 a->val.str.len));
1288 return 0;
1291 /* Returns nonzero if a space should be inserted to avoid an
1292 accidental token paste for output. For simplicity, it is
1293 conservative, and occasionally advises a space where one is not
1294 needed, e.g. "." and ".2". */
1296 cpp_avoid_paste (pfile, token1, token2)
1297 cpp_reader *pfile;
1298 const cpp_token *token1, *token2;
1300 enum cpp_ttype a = token1->type, b = token2->type;
1301 cppchar_t c;
1303 if (token1->flags & NAMED_OP)
1304 a = CPP_NAME;
1305 if (token2->flags & NAMED_OP)
1306 b = CPP_NAME;
1308 c = EOF;
1309 if (token2->flags & DIGRAPH)
1310 c = digraph_spellings[(int) b - (int) CPP_FIRST_DIGRAPH][0];
1311 else if (token_spellings[b].category == SPELL_OPERATOR)
1312 c = token_spellings[b].name[0];
1314 /* Quickly get everything that can paste with an '='. */
1315 if ((int) a <= (int) CPP_LAST_EQ && c == '=')
1316 return 1;
1318 switch (a)
1320 case CPP_GREATER: return c == '>' || c == '?';
1321 case CPP_LESS: return c == '<' || c == '?' || c == '%' || c == ':';
1322 case CPP_PLUS: return c == '+';
1323 case CPP_MINUS: return c == '-' || c == '>';
1324 case CPP_DIV: return c == '/' || c == '*'; /* Comments. */
1325 case CPP_MOD: return c == ':' || c == '>';
1326 case CPP_AND: return c == '&';
1327 case CPP_OR: return c == '|';
1328 case CPP_COLON: return c == ':' || c == '>';
1329 case CPP_DEREF: return c == '*';
1330 case CPP_DOT: return c == '.' || c == '%' || b == CPP_NUMBER;
1331 case CPP_HASH: return c == '#' || c == '%'; /* Digraph form. */
1332 case CPP_NAME: return ((b == CPP_NUMBER
1333 && name_p (pfile, &token2->val.str))
1334 || b == CPP_NAME
1335 || b == CPP_CHAR || b == CPP_STRING); /* L */
1336 case CPP_NUMBER: return (b == CPP_NUMBER || b == CPP_NAME
1337 || c == '.' || c == '+' || c == '-');
1338 /* UCNs */
1339 case CPP_OTHER: return ((token1->val.str.text[0] == '\\'
1340 && b == CPP_NAME)
1341 || (CPP_OPTION (pfile, objc)
1342 && token1->val.str.text[0] == '@'
1343 && (b == CPP_NAME || b == CPP_STRING)));
1344 default: break;
1347 return 0;
1350 /* Output all the remaining tokens on the current line, and a newline
1351 character, to FP. Leading whitespace is removed. If there are
1352 macros, special token padding is not performed. */
1353 void
1354 cpp_output_line (pfile, fp)
1355 cpp_reader *pfile;
1356 FILE *fp;
1358 const cpp_token *token;
1360 token = cpp_get_token (pfile);
1361 while (token->type != CPP_EOF)
1363 cpp_output_token (token, fp);
1364 token = cpp_get_token (pfile);
1365 if (token->flags & PREV_WHITE)
1366 putc (' ', fp);
1369 putc ('\n', fp);
1372 /* Returns the value of a hexadecimal digit. */
1373 static unsigned int
1374 hex_digit_value (c)
1375 unsigned int c;
1377 if (hex_p (c))
1378 return hex_value (c);
1379 else
1380 abort ();
1383 /* Read a possible universal character name starting at *PSTR. */
1384 static cppchar_t
1385 maybe_read_ucn (pfile, pstr)
1386 cpp_reader *pfile;
1387 const uchar **pstr;
1389 cppchar_t result, c = (*pstr)[-1];
1391 result = _cpp_valid_ucn (pfile, pstr, false);
1392 if (result)
1394 if (CPP_WTRADITIONAL (pfile))
1395 cpp_error (pfile, DL_WARNING,
1396 "the meaning of '\\%c' is different in traditional C",
1397 (int) c);
1399 if (CPP_OPTION (pfile, EBCDIC))
1401 cpp_error (pfile, DL_ERROR,
1402 "universal character with an EBCDIC target");
1403 result = 0x3f; /* EBCDIC invalid character */
1407 return result;
1410 /* Returns the value of an escape sequence, truncated to the correct
1411 target precision. PSTR points to the input pointer, which is just
1412 after the backslash. LIMIT is how much text we have. WIDE is true
1413 if the escape sequence is part of a wide character constant or
1414 string literal. Handles all relevant diagnostics. */
1415 cppchar_t
1416 cpp_parse_escape (pfile, pstr, limit, wide)
1417 cpp_reader *pfile;
1418 const unsigned char **pstr;
1419 const unsigned char *limit;
1420 int wide;
1422 /* Values of \a \b \e \f \n \r \t \v respectively. */
1423 static const uchar ascii[] = { 7, 8, 27, 12, 10, 13, 9, 11 };
1424 static const uchar ebcdic[] = { 47, 22, 39, 12, 21, 13, 5, 11 };
1426 int unknown = 0;
1427 const unsigned char *str = *pstr, *charconsts;
1428 cppchar_t c, ucn, mask;
1429 unsigned int width;
1431 if (CPP_OPTION (pfile, EBCDIC))
1432 charconsts = ebcdic;
1433 else
1434 charconsts = ascii;
1436 if (wide)
1437 width = CPP_OPTION (pfile, wchar_precision);
1438 else
1439 width = CPP_OPTION (pfile, char_precision);
1440 if (width < BITS_PER_CPPCHAR_T)
1441 mask = ((cppchar_t) 1 << width) - 1;
1442 else
1443 mask = ~0;
1445 c = *str++;
1446 switch (c)
1448 case '\\': case '\'': case '"': case '?': break;
1449 case 'b': c = charconsts[1]; break;
1450 case 'f': c = charconsts[3]; break;
1451 case 'n': c = charconsts[4]; break;
1452 case 'r': c = charconsts[5]; break;
1453 case 't': c = charconsts[6]; break;
1454 case 'v': c = charconsts[7]; break;
1456 case '(': case '{': case '[': case '%':
1457 /* '\(', etc, are used at beginning of line to avoid confusing Emacs.
1458 '\%' is used to prevent SCCS from getting confused. */
1459 unknown = CPP_PEDANTIC (pfile);
1460 break;
1462 case 'a':
1463 if (CPP_WTRADITIONAL (pfile))
1464 cpp_error (pfile, DL_WARNING,
1465 "the meaning of '\\a' is different in traditional C");
1466 c = charconsts[0];
1467 break;
1469 case 'e': case 'E':
1470 if (CPP_PEDANTIC (pfile))
1471 cpp_error (pfile, DL_PEDWARN,
1472 "non-ISO-standard escape sequence, '\\%c'", (int) c);
1473 c = charconsts[2];
1474 break;
1476 case 'u': case 'U':
1477 ucn = maybe_read_ucn (pfile, &str);
1478 if (ucn)
1479 c = ucn;
1480 else
1481 unknown = true;
1482 break;
1484 case 'x':
1485 if (CPP_WTRADITIONAL (pfile))
1486 cpp_error (pfile, DL_WARNING,
1487 "the meaning of '\\x' is different in traditional C");
1490 cppchar_t i = 0, overflow = 0;
1491 int digits_found = 0;
1493 while (str < limit)
1495 c = *str;
1496 if (! ISXDIGIT (c))
1497 break;
1498 str++;
1499 overflow |= i ^ (i << 4 >> 4);
1500 i = (i << 4) + hex_digit_value (c);
1501 digits_found = 1;
1504 if (!digits_found)
1505 cpp_error (pfile, DL_ERROR,
1506 "\\x used with no following hex digits");
1508 if (overflow | (i != (i & mask)))
1510 cpp_error (pfile, DL_PEDWARN,
1511 "hex escape sequence out of range");
1512 i &= mask;
1514 c = i;
1516 break;
1518 case '0': case '1': case '2': case '3':
1519 case '4': case '5': case '6': case '7':
1521 size_t count = 0;
1522 cppchar_t i = c - '0';
1524 while (str < limit && ++count < 3)
1526 c = *str;
1527 if (c < '0' || c > '7')
1528 break;
1529 str++;
1530 i = (i << 3) + c - '0';
1533 if (i != (i & mask))
1535 cpp_error (pfile, DL_PEDWARN,
1536 "octal escape sequence out of range");
1537 i &= mask;
1539 c = i;
1541 break;
1543 default:
1544 unknown = 1;
1545 break;
1548 if (unknown)
1550 if (ISGRAPH (c))
1551 cpp_error (pfile, DL_PEDWARN,
1552 "unknown escape sequence '\\%c'", (int) c);
1553 else
1554 cpp_error (pfile, DL_PEDWARN,
1555 "unknown escape sequence: '\\%03o'", (int) c);
1558 if (c > mask)
1560 cpp_error (pfile, DL_PEDWARN, "escape sequence out of range for its type");
1561 c &= mask;
1564 *pstr = str;
1565 return c;
1568 /* Interpret a (possibly wide) character constant in TOKEN.
1569 WARN_MULTI warns about multi-character charconsts. PCHARS_SEEN
1570 points to a variable that is filled in with the number of
1571 characters seen, and UNSIGNEDP to a variable that indicates whether
1572 the result has signed type. */
1573 cppchar_t
1574 cpp_interpret_charconst (pfile, token, pchars_seen, unsignedp)
1575 cpp_reader *pfile;
1576 const cpp_token *token;
1577 unsigned int *pchars_seen;
1578 int *unsignedp;
1580 const unsigned char *str, *limit;
1581 unsigned int chars_seen = 0;
1582 size_t width, max_chars;
1583 cppchar_t c, mask, result = 0;
1584 bool unsigned_p;
1586 str = token->val.str.text + 1 + (token->type == CPP_WCHAR);
1587 limit = token->val.str.text + token->val.str.len - 1;
1589 if (token->type == CPP_CHAR)
1591 width = CPP_OPTION (pfile, char_precision);
1592 max_chars = CPP_OPTION (pfile, int_precision) / width;
1593 unsigned_p = CPP_OPTION (pfile, unsigned_char);
1595 else
1597 width = CPP_OPTION (pfile, wchar_precision);
1598 max_chars = 1;
1599 unsigned_p = CPP_OPTION (pfile, unsigned_wchar);
1602 if (width < BITS_PER_CPPCHAR_T)
1603 mask = ((cppchar_t) 1 << width) - 1;
1604 else
1605 mask = ~0;
1607 while (str < limit)
1609 c = *str++;
1611 if (c == '\\')
1612 c = cpp_parse_escape (pfile, &str, limit, token->type == CPP_WCHAR);
1614 #ifdef MAP_CHARACTER
1615 if (ISPRINT (c))
1616 c = MAP_CHARACTER (c);
1617 #endif
1619 chars_seen++;
1621 /* Truncate the character, scale the result and merge the two. */
1622 c &= mask;
1623 if (width < BITS_PER_CPPCHAR_T)
1624 result = (result << width) | c;
1625 else
1626 result = c;
1629 if (chars_seen == 0)
1630 cpp_error (pfile, DL_ERROR, "empty character constant");
1631 else if (chars_seen > 1)
1633 /* Multichar charconsts are of type int and therefore signed. */
1634 unsigned_p = 0;
1636 if (chars_seen > max_chars)
1638 chars_seen = max_chars;
1639 cpp_error (pfile, DL_WARNING,
1640 "character constant too long for its type");
1642 else if (CPP_OPTION (pfile, warn_multichar))
1643 cpp_error (pfile, DL_WARNING, "multi-character character constant");
1646 /* Sign-extend or truncate the constant to cppchar_t. The value is
1647 in WIDTH bits, but for multi-char charconsts it's value is the
1648 full target type's width. */
1649 if (chars_seen > 1)
1650 width *= max_chars;
1651 if (width < BITS_PER_CPPCHAR_T)
1653 mask = ((cppchar_t) 1 << width) - 1;
1654 if (unsigned_p || !(result & (1 << (width - 1))))
1655 result &= mask;
1656 else
1657 result |= ~mask;
1660 *pchars_seen = chars_seen;
1661 *unsignedp = unsigned_p;
1662 return result;
1665 /* Memory buffers. Changing these three constants can have a dramatic
1666 effect on performance. The values here are reasonable defaults,
1667 but might be tuned. If you adjust them, be sure to test across a
1668 range of uses of cpplib, including heavy nested function-like macro
1669 expansion. Also check the change in peak memory usage (NJAMD is a
1670 good tool for this). */
1671 #define MIN_BUFF_SIZE 8000
1672 #define BUFF_SIZE_UPPER_BOUND(MIN_SIZE) (MIN_BUFF_SIZE + (MIN_SIZE) * 3 / 2)
1673 #define EXTENDED_BUFF_SIZE(BUFF, MIN_EXTRA) \
1674 (MIN_EXTRA + ((BUFF)->limit - (BUFF)->cur) * 2)
1676 #if MIN_BUFF_SIZE > BUFF_SIZE_UPPER_BOUND (0)
1677 #error BUFF_SIZE_UPPER_BOUND must be at least as large as MIN_BUFF_SIZE!
1678 #endif
1680 /* Create a new allocation buffer. Place the control block at the end
1681 of the buffer, so that buffer overflows will cause immediate chaos. */
1682 static _cpp_buff *
1683 new_buff (len)
1684 size_t len;
1686 _cpp_buff *result;
1687 unsigned char *base;
1689 if (len < MIN_BUFF_SIZE)
1690 len = MIN_BUFF_SIZE;
1691 len = CPP_ALIGN (len);
1693 base = xmalloc (len + sizeof (_cpp_buff));
1694 result = (_cpp_buff *) (base + len);
1695 result->base = base;
1696 result->cur = base;
1697 result->limit = base + len;
1698 result->next = NULL;
1699 return result;
1702 /* Place a chain of unwanted allocation buffers on the free list. */
1703 void
1704 _cpp_release_buff (pfile, buff)
1705 cpp_reader *pfile;
1706 _cpp_buff *buff;
1708 _cpp_buff *end = buff;
1710 while (end->next)
1711 end = end->next;
1712 end->next = pfile->free_buffs;
1713 pfile->free_buffs = buff;
1716 /* Return a free buffer of size at least MIN_SIZE. */
1717 _cpp_buff *
1718 _cpp_get_buff (pfile, min_size)
1719 cpp_reader *pfile;
1720 size_t min_size;
1722 _cpp_buff *result, **p;
1724 for (p = &pfile->free_buffs;; p = &(*p)->next)
1726 size_t size;
1728 if (*p == NULL)
1729 return new_buff (min_size);
1730 result = *p;
1731 size = result->limit - result->base;
1732 /* Return a buffer that's big enough, but don't waste one that's
1733 way too big. */
1734 if (size >= min_size && size <= BUFF_SIZE_UPPER_BOUND (min_size))
1735 break;
1738 *p = result->next;
1739 result->next = NULL;
1740 result->cur = result->base;
1741 return result;
1744 /* Creates a new buffer with enough space to hold the uncommitted
1745 remaining bytes of BUFF, and at least MIN_EXTRA more bytes. Copies
1746 the excess bytes to the new buffer. Chains the new buffer after
1747 BUFF, and returns the new buffer. */
1748 _cpp_buff *
1749 _cpp_append_extend_buff (pfile, buff, min_extra)
1750 cpp_reader *pfile;
1751 _cpp_buff *buff;
1752 size_t min_extra;
1754 size_t size = EXTENDED_BUFF_SIZE (buff, min_extra);
1755 _cpp_buff *new_buff = _cpp_get_buff (pfile, size);
1757 buff->next = new_buff;
1758 memcpy (new_buff->base, buff->cur, BUFF_ROOM (buff));
1759 return new_buff;
1762 /* Creates a new buffer with enough space to hold the uncommitted
1763 remaining bytes of the buffer pointed to by BUFF, and at least
1764 MIN_EXTRA more bytes. Copies the excess bytes to the new buffer.
1765 Chains the new buffer before the buffer pointed to by BUFF, and
1766 updates the pointer to point to the new buffer. */
1767 void
1768 _cpp_extend_buff (pfile, pbuff, min_extra)
1769 cpp_reader *pfile;
1770 _cpp_buff **pbuff;
1771 size_t min_extra;
1773 _cpp_buff *new_buff, *old_buff = *pbuff;
1774 size_t size = EXTENDED_BUFF_SIZE (old_buff, min_extra);
1776 new_buff = _cpp_get_buff (pfile, size);
1777 memcpy (new_buff->base, old_buff->cur, BUFF_ROOM (old_buff));
1778 new_buff->next = old_buff;
1779 *pbuff = new_buff;
1782 /* Free a chain of buffers starting at BUFF. */
1783 void
1784 _cpp_free_buff (buff)
1785 _cpp_buff *buff;
1787 _cpp_buff *next;
1789 for (; buff; buff = next)
1791 next = buff->next;
1792 free (buff->base);
1796 /* Allocate permanent, unaligned storage of length LEN. */
1797 unsigned char *
1798 _cpp_unaligned_alloc (pfile, len)
1799 cpp_reader *pfile;
1800 size_t len;
1802 _cpp_buff *buff = pfile->u_buff;
1803 unsigned char *result = buff->cur;
1805 if (len > (size_t) (buff->limit - result))
1807 buff = _cpp_get_buff (pfile, len);
1808 buff->next = pfile->u_buff;
1809 pfile->u_buff = buff;
1810 result = buff->cur;
1813 buff->cur = result + len;
1814 return result;
1817 /* Allocate permanent, unaligned storage of length LEN from a_buff.
1818 That buffer is used for growing allocations when saving macro
1819 replacement lists in a #define, and when parsing an answer to an
1820 assertion in #assert, #unassert or #if (and therefore possibly
1821 whilst expanding macros). It therefore must not be used by any
1822 code that they might call: specifically the lexer and the guts of
1823 the macro expander.
1825 All existing other uses clearly fit this restriction: storing
1826 registered pragmas during initialization. */
1827 unsigned char *
1828 _cpp_aligned_alloc (pfile, len)
1829 cpp_reader *pfile;
1830 size_t len;
1832 _cpp_buff *buff = pfile->a_buff;
1833 unsigned char *result = buff->cur;
1835 if (len > (size_t) (buff->limit - result))
1837 buff = _cpp_get_buff (pfile, len);
1838 buff->next = pfile->a_buff;
1839 pfile->a_buff = buff;
1840 result = buff->cur;
1843 buff->cur = result + len;
1844 return result;