PR target/11183
[official-gcc.git] / gcc / cpplex.c
blob72a5e4d612570d9defaca67237d041bedd75381c
1 /* CPP Library - lexical analysis.
2 Copyright (C) 2000, 2001, 2002, 2003 Free Software Foundation, Inc.
3 Contributed by Per Bothner, 1994-95.
4 Based on CCCP program by Paul Rubin, June 1986
5 Adapted to ANSI C, Richard Stallman, Jan 1987
6 Broken out to separate file, Zack Weinberg, Mar 2000
8 This program is free software; you can redistribute it and/or modify it
9 under the terms of the GNU General Public License as published by the
10 Free Software Foundation; either version 2, or (at your option) any
11 later version.
13 This program is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with this program; if not, write to the Free Software
20 Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "tm.h"
26 #include "cpplib.h"
27 #include "cpphash.h"
29 enum spell_type
31 SPELL_OPERATOR = 0,
32 SPELL_IDENT,
33 SPELL_LITERAL,
34 SPELL_NONE
37 struct token_spelling
39 enum spell_type category;
40 const unsigned char *name;
43 static const unsigned char *const digraph_spellings[] =
44 { U"%:", U"%:%:", U"<:", U":>", U"<%", U"%>" };
46 #define OP(e, s) { SPELL_OPERATOR, U s },
47 #define TK(e, s) { s, U STRINGX (e) },
48 static const struct token_spelling token_spellings[N_TTYPES] = { TTYPE_TABLE };
49 #undef OP
50 #undef TK
52 #define TOKEN_SPELL(token) (token_spellings[(token)->type].category)
53 #define TOKEN_NAME(token) (token_spellings[(token)->type].name)
55 static void add_line_note PARAMS ((cpp_buffer *, const uchar *, unsigned int));
56 static int skip_line_comment PARAMS ((cpp_reader *));
57 static void skip_whitespace PARAMS ((cpp_reader *, cppchar_t));
58 static cpp_hashnode *lex_identifier PARAMS ((cpp_reader *, const uchar *));
59 static void lex_number PARAMS ((cpp_reader *, cpp_string *));
60 static bool forms_identifier_p PARAMS ((cpp_reader *, int));
61 static void lex_string PARAMS ((cpp_reader *, cpp_token *, const uchar *));
62 static void save_comment PARAMS ((cpp_reader *, cpp_token *, const uchar *,
63 cppchar_t));
64 static void create_literal PARAMS ((cpp_reader *, cpp_token *, const uchar *,
65 unsigned int, enum cpp_ttype));
66 static bool warn_in_comment PARAMS ((cpp_reader *, _cpp_line_note *));
67 static int name_p PARAMS ((cpp_reader *, const cpp_string *));
68 static cppchar_t maybe_read_ucn PARAMS ((cpp_reader *, const uchar **));
69 static tokenrun *next_tokenrun PARAMS ((tokenrun *));
71 static unsigned int hex_digit_value PARAMS ((unsigned int));
72 static _cpp_buff *new_buff PARAMS ((size_t));
75 /* Utility routine:
77 Compares, the token TOKEN to the NUL-terminated string STRING.
78 TOKEN must be a CPP_NAME. Returns 1 for equal, 0 for unequal. */
79 int
80 cpp_ideq (token, string)
81 const cpp_token *token;
82 const char *string;
84 if (token->type != CPP_NAME)
85 return 0;
87 return !ustrcmp (NODE_NAME (token->val.node), (const uchar *) string);
90 /* Record a note TYPE at byte POS into the current cleaned logical
91 line. */
92 static void
93 add_line_note (buffer, pos, type)
94 cpp_buffer *buffer;
95 const uchar *pos;
96 unsigned int type;
98 if (buffer->notes_used == buffer->notes_cap)
100 buffer->notes_cap = buffer->notes_cap * 2 + 200;
101 buffer->notes = (_cpp_line_note *)
102 xrealloc (buffer->notes, buffer->notes_cap * sizeof (_cpp_line_note));
105 buffer->notes[buffer->notes_used].pos = pos;
106 buffer->notes[buffer->notes_used].type = type;
107 buffer->notes_used++;
110 /* Returns with a logical line that contains no escaped newlines or
111 trigraphs. This is a time-critical inner loop. */
112 void
113 _cpp_clean_line (pfile)
114 cpp_reader *pfile;
116 cpp_buffer *buffer;
117 const uchar *s;
118 uchar c, *d, *p;
120 buffer = pfile->buffer;
121 buffer->cur_note = buffer->notes_used = 0;
122 buffer->cur = buffer->line_base = buffer->next_line;
123 buffer->need_line = false;
124 s = buffer->next_line - 1;
126 if (!buffer->from_stage3)
128 d = (uchar *) s;
130 for (;;)
132 c = *++s;
133 *++d = c;
135 if (c == '\n' || c == '\r')
137 /* Handle DOS line endings. */
138 if (c == '\r' && s != buffer->rlimit && s[1] == '\n')
139 s++;
140 if (s == buffer->rlimit)
141 break;
143 /* Escaped? */
144 p = d;
145 while (p != buffer->next_line && is_nvspace (p[-1]))
146 p--;
147 if (p == buffer->next_line || p[-1] != '\\')
148 break;
150 add_line_note (buffer, p - 1, p != d ? ' ': '\\');
151 d = p - 2;
152 buffer->next_line = p - 1;
154 else if (c == '?' && s[1] == '?' && _cpp_trigraph_map[s[2]])
156 /* Add a note regardless, for the benefit of -Wtrigraphs. */
157 add_line_note (buffer, d, s[2]);
158 if (CPP_OPTION (pfile, trigraphs))
160 *d = _cpp_trigraph_map[s[2]];
161 s += 2;
166 else
169 s++;
170 while (*s != '\n' && *s != '\r');
171 d = (uchar *) s;
173 /* Handle DOS line endings. */
174 if (*s == '\r' && s != buffer->rlimit && s[1] == '\n')
175 s++;
178 *d = '\n';
179 /* A sentinel note that should never be processed. */
180 add_line_note (buffer, d + 1, '\n');
181 buffer->next_line = s + 1;
184 /* Return true if the trigraph indicated by NOTE should be warned
185 about in a comment. */
186 static bool
187 warn_in_comment (pfile, note)
188 cpp_reader *pfile;
189 _cpp_line_note *note;
191 const uchar *p;
193 /* Within comments we don't warn about trigraphs, unless the
194 trigraph forms an escaped newline, as that may change
195 behavior. */
196 if (note->type != '/')
197 return false;
199 /* If -trigraphs, then this was an escaped newline iff the next note
200 is coincident. */
201 if (CPP_OPTION (pfile, trigraphs))
202 return note[1].pos == note->pos;
204 /* Otherwise, see if this forms an escaped newline. */
205 p = note->pos + 3;
206 while (is_nvspace (*p))
207 p++;
209 /* There might have been escaped newlines between the trigraph and the
210 newline we found. Hence the position test. */
211 return (*p == '\n' && p < note[1].pos);
214 /* Process the notes created by add_line_note as far as the current
215 location. */
216 void
217 _cpp_process_line_notes (pfile, in_comment)
218 cpp_reader *pfile;
219 int in_comment;
221 cpp_buffer *buffer = pfile->buffer;
223 for (;;)
225 _cpp_line_note *note = &buffer->notes[buffer->cur_note];
226 unsigned int col;
228 if (note->pos > buffer->cur)
229 break;
231 buffer->cur_note++;
232 col = CPP_BUF_COLUMN (buffer, note->pos + 1);
234 if (note->type == '\\' || note->type == ' ')
236 if (note->type == ' ' && !in_comment)
237 cpp_error_with_line (pfile, DL_WARNING, pfile->line, col,
238 "backslash and newline separated by space");
240 if (buffer->next_line > buffer->rlimit)
242 cpp_error_with_line (pfile, DL_PEDWARN, pfile->line, col,
243 "backslash-newline at end of file");
244 /* Prevent "no newline at end of file" warning. */
245 buffer->next_line = buffer->rlimit;
248 buffer->line_base = note->pos;
249 pfile->line++;
251 else if (_cpp_trigraph_map[note->type])
253 if (CPP_OPTION (pfile, warn_trigraphs)
254 && (!in_comment || warn_in_comment (pfile, note)))
256 if (CPP_OPTION (pfile, trigraphs))
257 cpp_error_with_line (pfile, DL_WARNING, pfile->line, col,
258 "trigraph ??%c converted to %c",
259 note->type,
260 (int) _cpp_trigraph_map[note->type]);
261 else
262 cpp_error_with_line (pfile, DL_WARNING, pfile->line, col,
263 "trigraph ??%c ignored",
264 note->type);
267 else
268 abort ();
272 /* Skip a C-style block comment. We find the end of the comment by
273 seeing if an asterisk is before every '/' we encounter. Returns
274 nonzero if comment terminated by EOF, zero otherwise.
276 Buffer->cur points to the initial asterisk of the comment. */
277 bool
278 _cpp_skip_block_comment (pfile)
279 cpp_reader *pfile;
281 cpp_buffer *buffer = pfile->buffer;
282 cppchar_t c;
284 buffer->cur++;
285 if (*buffer->cur == '/')
286 buffer->cur++;
288 for (;;)
290 c = *buffer->cur++;
292 /* People like decorating comments with '*', so check for '/'
293 instead for efficiency. */
294 if (c == '/')
296 if (buffer->cur[-2] == '*')
297 break;
299 /* Warn about potential nested comments, but not if the '/'
300 comes immediately before the true comment delimiter.
301 Don't bother to get it right across escaped newlines. */
302 if (CPP_OPTION (pfile, warn_comments)
303 && buffer->cur[0] == '*' && buffer->cur[1] != '/')
304 cpp_error_with_line (pfile, DL_WARNING,
305 pfile->line, CPP_BUF_COL (buffer),
306 "\"/*\" within comment");
308 else if (c == '\n')
310 buffer->cur--;
311 _cpp_process_line_notes (pfile, true);
312 if (buffer->next_line >= buffer->rlimit)
313 return true;
314 _cpp_clean_line (pfile);
315 pfile->line++;
319 _cpp_process_line_notes (pfile, true);
320 return false;
323 /* Skip a C++ line comment, leaving buffer->cur pointing to the
324 terminating newline. Handles escaped newlines. Returns nonzero
325 if a multiline comment. */
326 static int
327 skip_line_comment (pfile)
328 cpp_reader *pfile;
330 cpp_buffer *buffer = pfile->buffer;
331 unsigned int orig_line = pfile->line;
333 while (*buffer->cur != '\n')
334 buffer->cur++;
336 _cpp_process_line_notes (pfile, true);
337 return orig_line != pfile->line;
340 /* Skips whitespace, saving the next non-whitespace character. */
341 static void
342 skip_whitespace (pfile, c)
343 cpp_reader *pfile;
344 cppchar_t c;
346 cpp_buffer *buffer = pfile->buffer;
347 bool saw_NUL = false;
351 /* Horizontal space always OK. */
352 if (c == ' ' || c == '\t')
354 /* Just \f \v or \0 left. */
355 else if (c == '\0')
356 saw_NUL = true;
357 else if (pfile->state.in_directive && CPP_PEDANTIC (pfile))
358 cpp_error_with_line (pfile, DL_PEDWARN, pfile->line,
359 CPP_BUF_COL (buffer),
360 "%s in preprocessing directive",
361 c == '\f' ? "form feed" : "vertical tab");
363 c = *buffer->cur++;
365 /* We only want non-vertical space, i.e. ' ' \t \f \v \0. */
366 while (is_nvspace (c));
368 if (saw_NUL)
369 cpp_error (pfile, DL_WARNING, "null character(s) ignored");
371 buffer->cur--;
374 /* See if the characters of a number token are valid in a name (no
375 '.', '+' or '-'). */
376 static int
377 name_p (pfile, string)
378 cpp_reader *pfile;
379 const cpp_string *string;
381 unsigned int i;
383 for (i = 0; i < string->len; i++)
384 if (!is_idchar (string->text[i]))
385 return 0;
387 return 1;
390 /* Returns TRUE if the sequence starting at buffer->cur is invalid in
391 an identifier. FIRST is TRUE if this starts an identifier. */
392 static bool
393 forms_identifier_p (pfile, first)
394 cpp_reader *pfile;
395 int first;
397 cpp_buffer *buffer = pfile->buffer;
399 if (*buffer->cur == '$')
401 if (!CPP_OPTION (pfile, dollars_in_ident))
402 return false;
404 buffer->cur++;
405 if (CPP_OPTION (pfile, warn_dollars) && !pfile->state.skipping)
407 CPP_OPTION (pfile, warn_dollars) = 0;
408 cpp_error (pfile, DL_PEDWARN, "'$' in identifier or number");
411 return true;
414 /* Is this a syntactically valid UCN? */
415 if (0 && *buffer->cur == '\\'
416 && (buffer->cur[1] == 'u' || buffer->cur[1] == 'U'))
418 buffer->cur += 2;
419 if (_cpp_valid_ucn (pfile, &buffer->cur, 1 + !first))
420 return true;
421 buffer->cur -= 2;
424 return false;
427 /* Lex an identifier starting at BUFFER->CUR - 1. */
428 static cpp_hashnode *
429 lex_identifier (pfile, base)
430 cpp_reader *pfile;
431 const uchar *base;
433 cpp_hashnode *result;
434 const uchar *cur;
438 cur = pfile->buffer->cur;
440 /* N.B. ISIDNUM does not include $. */
441 while (ISIDNUM (*cur))
442 cur++;
444 pfile->buffer->cur = cur;
446 while (forms_identifier_p (pfile, false));
448 result = (cpp_hashnode *)
449 ht_lookup (pfile->hash_table, base, cur - base, HT_ALLOC);
451 /* Rarely, identifiers require diagnostics when lexed. */
452 if (__builtin_expect ((result->flags & NODE_DIAGNOSTIC)
453 && !pfile->state.skipping, 0))
455 /* It is allowed to poison the same identifier twice. */
456 if ((result->flags & NODE_POISONED) && !pfile->state.poisoned_ok)
457 cpp_error (pfile, DL_ERROR, "attempt to use poisoned \"%s\"",
458 NODE_NAME (result));
460 /* Constraint 6.10.3.5: __VA_ARGS__ should only appear in the
461 replacement list of a variadic macro. */
462 if (result == pfile->spec_nodes.n__VA_ARGS__
463 && !pfile->state.va_args_ok)
464 cpp_error (pfile, DL_PEDWARN,
465 "__VA_ARGS__ can only appear in the expansion of a C99 variadic macro");
468 return result;
471 /* Lex a number to NUMBER starting at BUFFER->CUR - 1. */
472 static void
473 lex_number (pfile, number)
474 cpp_reader *pfile;
475 cpp_string *number;
477 const uchar *cur;
478 const uchar *base;
479 uchar *dest;
481 base = pfile->buffer->cur - 1;
484 cur = pfile->buffer->cur;
486 /* N.B. ISIDNUM does not include $. */
487 while (ISIDNUM (*cur) || *cur == '.' || VALID_SIGN (*cur, cur[-1]))
488 cur++;
490 pfile->buffer->cur = cur;
492 while (forms_identifier_p (pfile, false));
494 number->len = cur - base;
495 dest = _cpp_unaligned_alloc (pfile, number->len + 1);
496 memcpy (dest, base, number->len);
497 dest[number->len] = '\0';
498 number->text = dest;
501 /* Create a token of type TYPE with a literal spelling. */
502 static void
503 create_literal (pfile, token, base, len, type)
504 cpp_reader *pfile;
505 cpp_token *token;
506 const uchar *base;
507 unsigned int len;
508 enum cpp_ttype type;
510 uchar *dest = _cpp_unaligned_alloc (pfile, len + 1);
512 memcpy (dest, base, len);
513 dest[len] = '\0';
514 token->type = type;
515 token->val.str.len = len;
516 token->val.str.text = dest;
519 /* Lexes a string, character constant, or angle-bracketed header file
520 name. The stored string contains the spelling, including opening
521 quote and leading any leading 'L'. It returns the type of the
522 literal, or CPP_OTHER if it was not properly terminated.
524 The spelling is NUL-terminated, but it is not guaranteed that this
525 is the first NUL since embedded NULs are preserved. */
526 static void
527 lex_string (pfile, token, base)
528 cpp_reader *pfile;
529 cpp_token *token;
530 const uchar *base;
532 bool saw_NUL = false;
533 const uchar *cur;
534 cppchar_t terminator;
535 enum cpp_ttype type;
537 cur = base;
538 terminator = *cur++;
539 if (terminator == 'L')
540 terminator = *cur++;
541 if (terminator == '\"')
542 type = *base == 'L' ? CPP_WSTRING: CPP_STRING;
543 else if (terminator == '\'')
544 type = *base == 'L' ? CPP_WCHAR: CPP_CHAR;
545 else
546 terminator = '>', type = CPP_HEADER_NAME;
548 for (;;)
550 cppchar_t c = *cur++;
552 /* In #include-style directives, terminators are not escapable. */
553 if (c == '\\' && !pfile->state.angled_headers && *cur != '\n')
554 cur++;
555 else if (c == terminator)
556 break;
557 else if (c == '\n')
559 cur--;
560 type = CPP_OTHER;
561 break;
563 else if (c == '\0')
564 saw_NUL = true;
567 if (saw_NUL && !pfile->state.skipping)
568 cpp_error (pfile, DL_WARNING, "null character(s) preserved in literal");
570 pfile->buffer->cur = cur;
571 create_literal (pfile, token, base, cur - base, type);
574 /* The stored comment includes the comment start and any terminator. */
575 static void
576 save_comment (pfile, token, from, type)
577 cpp_reader *pfile;
578 cpp_token *token;
579 const unsigned char *from;
580 cppchar_t type;
582 unsigned char *buffer;
583 unsigned int len, clen;
585 len = pfile->buffer->cur - from + 1; /* + 1 for the initial '/'. */
587 /* C++ comments probably (not definitely) have moved past a new
588 line, which we don't want to save in the comment. */
589 if (is_vspace (pfile->buffer->cur[-1]))
590 len--;
592 /* If we are currently in a directive, then we need to store all
593 C++ comments as C comments internally, and so we need to
594 allocate a little extra space in that case.
596 Note that the only time we encounter a directive here is
597 when we are saving comments in a "#define". */
598 clen = (pfile->state.in_directive && type == '/') ? len + 2 : len;
600 buffer = _cpp_unaligned_alloc (pfile, clen);
602 token->type = CPP_COMMENT;
603 token->val.str.len = clen;
604 token->val.str.text = buffer;
606 buffer[0] = '/';
607 memcpy (buffer + 1, from, len - 1);
609 /* Finish conversion to a C comment, if necessary. */
610 if (pfile->state.in_directive && type == '/')
612 buffer[1] = '*';
613 buffer[clen - 2] = '*';
614 buffer[clen - 1] = '/';
618 /* Allocate COUNT tokens for RUN. */
619 void
620 _cpp_init_tokenrun (run, count)
621 tokenrun *run;
622 unsigned int count;
624 run->base = xnewvec (cpp_token, count);
625 run->limit = run->base + count;
626 run->next = NULL;
629 /* Returns the next tokenrun, or creates one if there is none. */
630 static tokenrun *
631 next_tokenrun (run)
632 tokenrun *run;
634 if (run->next == NULL)
636 run->next = xnew (tokenrun);
637 run->next->prev = run;
638 _cpp_init_tokenrun (run->next, 250);
641 return run->next;
644 /* Allocate a single token that is invalidated at the same time as the
645 rest of the tokens on the line. Has its line and col set to the
646 same as the last lexed token, so that diagnostics appear in the
647 right place. */
648 cpp_token *
649 _cpp_temp_token (pfile)
650 cpp_reader *pfile;
652 cpp_token *old, *result;
654 old = pfile->cur_token - 1;
655 if (pfile->cur_token == pfile->cur_run->limit)
657 pfile->cur_run = next_tokenrun (pfile->cur_run);
658 pfile->cur_token = pfile->cur_run->base;
661 result = pfile->cur_token++;
662 result->line = old->line;
663 result->col = old->col;
664 return result;
667 /* Lex a token into RESULT (external interface). Takes care of issues
668 like directive handling, token lookahead, multiple include
669 optimization and skipping. */
670 const cpp_token *
671 _cpp_lex_token (pfile)
672 cpp_reader *pfile;
674 cpp_token *result;
676 for (;;)
678 if (pfile->cur_token == pfile->cur_run->limit)
680 pfile->cur_run = next_tokenrun (pfile->cur_run);
681 pfile->cur_token = pfile->cur_run->base;
684 if (pfile->lookaheads)
686 pfile->lookaheads--;
687 result = pfile->cur_token++;
689 else
690 result = _cpp_lex_direct (pfile);
692 if (result->flags & BOL)
694 /* Is this a directive. If _cpp_handle_directive returns
695 false, it is an assembler #. */
696 if (result->type == CPP_HASH
697 /* 6.10.3 p 11: Directives in a list of macro arguments
698 gives undefined behavior. This implementation
699 handles the directive as normal. */
700 && pfile->state.parsing_args != 1
701 && _cpp_handle_directive (pfile, result->flags & PREV_WHITE))
702 continue;
703 if (pfile->cb.line_change && !pfile->state.skipping)
704 (*pfile->cb.line_change)(pfile, result, pfile->state.parsing_args);
707 /* We don't skip tokens in directives. */
708 if (pfile->state.in_directive)
709 break;
711 /* Outside a directive, invalidate controlling macros. At file
712 EOF, _cpp_lex_direct takes care of popping the buffer, so we never
713 get here and MI optimization works. */
714 pfile->mi_valid = false;
716 if (!pfile->state.skipping || result->type == CPP_EOF)
717 break;
720 return result;
723 /* Returns true if a fresh line has been loaded. */
724 bool
725 _cpp_get_fresh_line (pfile)
726 cpp_reader *pfile;
728 /* We can't get a new line until we leave the current directive. */
729 if (pfile->state.in_directive)
730 return false;
732 for (;;)
734 cpp_buffer *buffer = pfile->buffer;
736 if (!buffer->need_line)
737 return true;
739 if (buffer->next_line < buffer->rlimit)
741 _cpp_clean_line (pfile);
742 return true;
745 /* First, get out of parsing arguments state. */
746 if (pfile->state.parsing_args)
747 return false;
749 /* End of buffer. Non-empty files should end in a newline. */
750 if (buffer->buf != buffer->rlimit
751 && buffer->next_line > buffer->rlimit
752 && !buffer->from_stage3)
754 /* Only warn once. */
755 buffer->next_line = buffer->rlimit;
756 cpp_error_with_line (pfile, DL_PEDWARN, pfile->line - 1,
757 CPP_BUF_COLUMN (buffer, buffer->cur),
758 "no newline at end of file");
761 if (!buffer->prev)
762 return false;
764 if (buffer->return_at_eof)
766 _cpp_pop_buffer (pfile);
767 return false;
770 _cpp_pop_buffer (pfile);
774 #define IF_NEXT_IS(CHAR, THEN_TYPE, ELSE_TYPE) \
775 do \
777 result->type = ELSE_TYPE; \
778 if (*buffer->cur == CHAR) \
779 buffer->cur++, result->type = THEN_TYPE; \
781 while (0)
783 /* Lex a token into pfile->cur_token, which is also incremented, to
784 get diagnostics pointing to the correct location.
786 Does not handle issues such as token lookahead, multiple-include
787 optimisation, directives, skipping etc. This function is only
788 suitable for use by _cpp_lex_token, and in special cases like
789 lex_expansion_token which doesn't care for any of these issues.
791 When meeting a newline, returns CPP_EOF if parsing a directive,
792 otherwise returns to the start of the token buffer if permissible.
793 Returns the location of the lexed token. */
794 cpp_token *
795 _cpp_lex_direct (pfile)
796 cpp_reader *pfile;
798 cppchar_t c;
799 cpp_buffer *buffer;
800 const unsigned char *comment_start;
801 cpp_token *result = pfile->cur_token++;
803 fresh_line:
804 result->flags = 0;
805 if (pfile->buffer->need_line)
807 if (!_cpp_get_fresh_line (pfile))
809 result->type = CPP_EOF;
810 if (!pfile->state.in_directive)
812 /* Tell the compiler the line number of the EOF token. */
813 result->line = pfile->line;
814 result->flags = BOL;
816 return result;
818 if (!pfile->keep_tokens)
820 pfile->cur_run = &pfile->base_run;
821 result = pfile->base_run.base;
822 pfile->cur_token = result + 1;
824 result->flags = BOL;
825 if (pfile->state.parsing_args == 2)
826 result->flags |= PREV_WHITE;
828 buffer = pfile->buffer;
829 update_tokens_line:
830 result->line = pfile->line;
832 skipped_white:
833 if (buffer->cur >= buffer->notes[buffer->cur_note].pos
834 && !pfile->overlaid_buffer)
836 _cpp_process_line_notes (pfile, false);
837 result->line = pfile->line;
839 c = *buffer->cur++;
840 result->col = CPP_BUF_COLUMN (buffer, buffer->cur);
842 switch (c)
844 case ' ': case '\t': case '\f': case '\v': case '\0':
845 result->flags |= PREV_WHITE;
846 skip_whitespace (pfile, c);
847 goto skipped_white;
849 case '\n':
850 pfile->line++;
851 buffer->need_line = true;
852 goto fresh_line;
854 case '0': case '1': case '2': case '3': case '4':
855 case '5': case '6': case '7': case '8': case '9':
856 result->type = CPP_NUMBER;
857 lex_number (pfile, &result->val.str);
858 break;
860 case 'L':
861 /* 'L' may introduce wide characters or strings. */
862 if (*buffer->cur == '\'' || *buffer->cur == '"')
864 lex_string (pfile, result, buffer->cur - 1);
865 break;
867 /* Fall through. */
869 case '_':
870 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
871 case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
872 case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
873 case 's': case 't': case 'u': case 'v': case 'w': case 'x':
874 case 'y': case 'z':
875 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
876 case 'G': case 'H': case 'I': case 'J': case 'K':
877 case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
878 case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
879 case 'Y': case 'Z':
880 result->type = CPP_NAME;
881 result->val.node = lex_identifier (pfile, buffer->cur - 1);
883 /* Convert named operators to their proper types. */
884 if (result->val.node->flags & NODE_OPERATOR)
886 result->flags |= NAMED_OP;
887 result->type = result->val.node->directive_index;
889 break;
891 case '\'':
892 case '"':
893 lex_string (pfile, result, buffer->cur - 1);
894 break;
896 case '/':
897 /* A potential block or line comment. */
898 comment_start = buffer->cur;
899 c = *buffer->cur;
901 if (c == '*')
903 if (_cpp_skip_block_comment (pfile))
904 cpp_error (pfile, DL_ERROR, "unterminated comment");
906 else if (c == '/' && (CPP_OPTION (pfile, cplusplus_comments)
907 || CPP_IN_SYSTEM_HEADER (pfile)))
909 /* Warn about comments only if pedantically GNUC89, and not
910 in system headers. */
911 if (CPP_OPTION (pfile, lang) == CLK_GNUC89 && CPP_PEDANTIC (pfile)
912 && ! buffer->warned_cplusplus_comments)
914 cpp_error (pfile, DL_PEDWARN,
915 "C++ style comments are not allowed in ISO C90");
916 cpp_error (pfile, DL_PEDWARN,
917 "(this will be reported only once per input file)");
918 buffer->warned_cplusplus_comments = 1;
921 if (skip_line_comment (pfile) && CPP_OPTION (pfile, warn_comments))
922 cpp_error (pfile, DL_WARNING, "multi-line comment");
924 else if (c == '=')
926 buffer->cur++;
927 result->type = CPP_DIV_EQ;
928 break;
930 else
932 result->type = CPP_DIV;
933 break;
936 if (!pfile->state.save_comments)
938 result->flags |= PREV_WHITE;
939 goto update_tokens_line;
942 /* Save the comment as a token in its own right. */
943 save_comment (pfile, result, comment_start, c);
944 break;
946 case '<':
947 if (pfile->state.angled_headers)
949 lex_string (pfile, result, buffer->cur - 1);
950 break;
953 result->type = CPP_LESS;
954 if (*buffer->cur == '=')
955 buffer->cur++, result->type = CPP_LESS_EQ;
956 else if (*buffer->cur == '<')
958 buffer->cur++;
959 IF_NEXT_IS ('=', CPP_LSHIFT_EQ, CPP_LSHIFT);
961 else if (*buffer->cur == '?' && CPP_OPTION (pfile, cplusplus))
963 buffer->cur++;
964 IF_NEXT_IS ('=', CPP_MIN_EQ, CPP_MIN);
966 else if (CPP_OPTION (pfile, digraphs))
968 if (*buffer->cur == ':')
970 buffer->cur++;
971 result->flags |= DIGRAPH;
972 result->type = CPP_OPEN_SQUARE;
974 else if (*buffer->cur == '%')
976 buffer->cur++;
977 result->flags |= DIGRAPH;
978 result->type = CPP_OPEN_BRACE;
981 break;
983 case '>':
984 result->type = CPP_GREATER;
985 if (*buffer->cur == '=')
986 buffer->cur++, result->type = CPP_GREATER_EQ;
987 else if (*buffer->cur == '>')
989 buffer->cur++;
990 IF_NEXT_IS ('=', CPP_RSHIFT_EQ, CPP_RSHIFT);
992 else if (*buffer->cur == '?' && CPP_OPTION (pfile, cplusplus))
994 buffer->cur++;
995 IF_NEXT_IS ('=', CPP_MAX_EQ, CPP_MAX);
997 break;
999 case '%':
1000 result->type = CPP_MOD;
1001 if (*buffer->cur == '=')
1002 buffer->cur++, result->type = CPP_MOD_EQ;
1003 else if (CPP_OPTION (pfile, digraphs))
1005 if (*buffer->cur == ':')
1007 buffer->cur++;
1008 result->flags |= DIGRAPH;
1009 result->type = CPP_HASH;
1010 if (*buffer->cur == '%' && buffer->cur[1] == ':')
1011 buffer->cur += 2, result->type = CPP_PASTE;
1013 else if (*buffer->cur == '>')
1015 buffer->cur++;
1016 result->flags |= DIGRAPH;
1017 result->type = CPP_CLOSE_BRACE;
1020 break;
1022 case '.':
1023 result->type = CPP_DOT;
1024 if (ISDIGIT (*buffer->cur))
1026 result->type = CPP_NUMBER;
1027 lex_number (pfile, &result->val.str);
1029 else if (*buffer->cur == '.' && buffer->cur[1] == '.')
1030 buffer->cur += 2, result->type = CPP_ELLIPSIS;
1031 else if (*buffer->cur == '*' && CPP_OPTION (pfile, cplusplus))
1032 buffer->cur++, result->type = CPP_DOT_STAR;
1033 break;
1035 case '+':
1036 result->type = CPP_PLUS;
1037 if (*buffer->cur == '+')
1038 buffer->cur++, result->type = CPP_PLUS_PLUS;
1039 else if (*buffer->cur == '=')
1040 buffer->cur++, result->type = CPP_PLUS_EQ;
1041 break;
1043 case '-':
1044 result->type = CPP_MINUS;
1045 if (*buffer->cur == '>')
1047 buffer->cur++;
1048 result->type = CPP_DEREF;
1049 if (*buffer->cur == '*' && CPP_OPTION (pfile, cplusplus))
1050 buffer->cur++, result->type = CPP_DEREF_STAR;
1052 else if (*buffer->cur == '-')
1053 buffer->cur++, result->type = CPP_MINUS_MINUS;
1054 else if (*buffer->cur == '=')
1055 buffer->cur++, result->type = CPP_MINUS_EQ;
1056 break;
1058 case '&':
1059 result->type = CPP_AND;
1060 if (*buffer->cur == '&')
1061 buffer->cur++, result->type = CPP_AND_AND;
1062 else if (*buffer->cur == '=')
1063 buffer->cur++, result->type = CPP_AND_EQ;
1064 break;
1066 case '|':
1067 result->type = CPP_OR;
1068 if (*buffer->cur == '|')
1069 buffer->cur++, result->type = CPP_OR_OR;
1070 else if (*buffer->cur == '=')
1071 buffer->cur++, result->type = CPP_OR_EQ;
1072 break;
1074 case ':':
1075 result->type = CPP_COLON;
1076 if (*buffer->cur == ':' && CPP_OPTION (pfile, cplusplus))
1077 buffer->cur++, result->type = CPP_SCOPE;
1078 else if (*buffer->cur == '>' && CPP_OPTION (pfile, digraphs))
1080 buffer->cur++;
1081 result->flags |= DIGRAPH;
1082 result->type = CPP_CLOSE_SQUARE;
1084 break;
1086 case '*': IF_NEXT_IS ('=', CPP_MULT_EQ, CPP_MULT); break;
1087 case '=': IF_NEXT_IS ('=', CPP_EQ_EQ, CPP_EQ); break;
1088 case '!': IF_NEXT_IS ('=', CPP_NOT_EQ, CPP_NOT); break;
1089 case '^': IF_NEXT_IS ('=', CPP_XOR_EQ, CPP_XOR); break;
1090 case '#': IF_NEXT_IS ('#', CPP_PASTE, CPP_HASH); break;
1092 case '?': result->type = CPP_QUERY; break;
1093 case '~': result->type = CPP_COMPL; break;
1094 case ',': result->type = CPP_COMMA; break;
1095 case '(': result->type = CPP_OPEN_PAREN; break;
1096 case ')': result->type = CPP_CLOSE_PAREN; break;
1097 case '[': result->type = CPP_OPEN_SQUARE; break;
1098 case ']': result->type = CPP_CLOSE_SQUARE; break;
1099 case '{': result->type = CPP_OPEN_BRACE; break;
1100 case '}': result->type = CPP_CLOSE_BRACE; break;
1101 case ';': result->type = CPP_SEMICOLON; break;
1103 /* @ is a punctuator in Objective-C. */
1104 case '@': result->type = CPP_ATSIGN; break;
1106 case '$':
1107 case '\\':
1109 const uchar *base = --buffer->cur;
1111 if (forms_identifier_p (pfile, true))
1113 result->type = CPP_NAME;
1114 result->val.node = lex_identifier (pfile, base);
1115 break;
1117 buffer->cur++;
1120 default:
1121 create_literal (pfile, result, buffer->cur - 1, 1, CPP_OTHER);
1122 break;
1125 return result;
1128 /* An upper bound on the number of bytes needed to spell TOKEN.
1129 Does not include preceding whitespace. */
1130 unsigned int
1131 cpp_token_len (token)
1132 const cpp_token *token;
1134 unsigned int len;
1136 switch (TOKEN_SPELL (token))
1138 default: len = 4; break;
1139 case SPELL_LITERAL: len = token->val.str.len; break;
1140 case SPELL_IDENT: len = NODE_LEN (token->val.node); break;
1143 return len;
1146 /* Write the spelling of a token TOKEN to BUFFER. The buffer must
1147 already contain the enough space to hold the token's spelling.
1148 Returns a pointer to the character after the last character
1149 written. */
1150 unsigned char *
1151 cpp_spell_token (pfile, token, buffer)
1152 cpp_reader *pfile; /* Would be nice to be rid of this... */
1153 const cpp_token *token;
1154 unsigned char *buffer;
1156 switch (TOKEN_SPELL (token))
1158 case SPELL_OPERATOR:
1160 const unsigned char *spelling;
1161 unsigned char c;
1163 if (token->flags & DIGRAPH)
1164 spelling
1165 = digraph_spellings[(int) token->type - (int) CPP_FIRST_DIGRAPH];
1166 else if (token->flags & NAMED_OP)
1167 goto spell_ident;
1168 else
1169 spelling = TOKEN_NAME (token);
1171 while ((c = *spelling++) != '\0')
1172 *buffer++ = c;
1174 break;
1176 spell_ident:
1177 case SPELL_IDENT:
1178 memcpy (buffer, NODE_NAME (token->val.node), NODE_LEN (token->val.node));
1179 buffer += NODE_LEN (token->val.node);
1180 break;
1182 case SPELL_LITERAL:
1183 memcpy (buffer, token->val.str.text, token->val.str.len);
1184 buffer += token->val.str.len;
1185 break;
1187 case SPELL_NONE:
1188 cpp_error (pfile, DL_ICE, "unspellable token %s", TOKEN_NAME (token));
1189 break;
1192 return buffer;
1195 /* Returns TOKEN spelt as a null-terminated string. The string is
1196 freed when the reader is destroyed. Useful for diagnostics. */
1197 unsigned char *
1198 cpp_token_as_text (pfile, token)
1199 cpp_reader *pfile;
1200 const cpp_token *token;
1202 unsigned int len = cpp_token_len (token) + 1;
1203 unsigned char *start = _cpp_unaligned_alloc (pfile, len), *end;
1205 end = cpp_spell_token (pfile, token, start);
1206 end[0] = '\0';
1208 return start;
1211 /* Used by C front ends, which really should move to using
1212 cpp_token_as_text. */
1213 const char *
1214 cpp_type2name (type)
1215 enum cpp_ttype type;
1217 return (const char *) token_spellings[type].name;
1220 /* Writes the spelling of token to FP, without any preceding space.
1221 Separated from cpp_spell_token for efficiency - to avoid stdio
1222 double-buffering. */
1223 void
1224 cpp_output_token (token, fp)
1225 const cpp_token *token;
1226 FILE *fp;
1228 switch (TOKEN_SPELL (token))
1230 case SPELL_OPERATOR:
1232 const unsigned char *spelling;
1233 int c;
1235 if (token->flags & DIGRAPH)
1236 spelling
1237 = digraph_spellings[(int) token->type - (int) CPP_FIRST_DIGRAPH];
1238 else if (token->flags & NAMED_OP)
1239 goto spell_ident;
1240 else
1241 spelling = TOKEN_NAME (token);
1243 c = *spelling;
1245 putc (c, fp);
1246 while ((c = *++spelling) != '\0');
1248 break;
1250 spell_ident:
1251 case SPELL_IDENT:
1252 fwrite (NODE_NAME (token->val.node), 1, NODE_LEN (token->val.node), fp);
1253 break;
1255 case SPELL_LITERAL:
1256 fwrite (token->val.str.text, 1, token->val.str.len, fp);
1257 break;
1259 case SPELL_NONE:
1260 /* An error, most probably. */
1261 break;
1265 /* Compare two tokens. */
1267 _cpp_equiv_tokens (a, b)
1268 const cpp_token *a, *b;
1270 if (a->type == b->type && a->flags == b->flags)
1271 switch (TOKEN_SPELL (a))
1273 default: /* Keep compiler happy. */
1274 case SPELL_OPERATOR:
1275 return 1;
1276 case SPELL_NONE:
1277 return (a->type != CPP_MACRO_ARG || a->val.arg_no == b->val.arg_no);
1278 case SPELL_IDENT:
1279 return a->val.node == b->val.node;
1280 case SPELL_LITERAL:
1281 return (a->val.str.len == b->val.str.len
1282 && !memcmp (a->val.str.text, b->val.str.text,
1283 a->val.str.len));
1286 return 0;
1289 /* Returns nonzero if a space should be inserted to avoid an
1290 accidental token paste for output. For simplicity, it is
1291 conservative, and occasionally advises a space where one is not
1292 needed, e.g. "." and ".2". */
1294 cpp_avoid_paste (pfile, token1, token2)
1295 cpp_reader *pfile;
1296 const cpp_token *token1, *token2;
1298 enum cpp_ttype a = token1->type, b = token2->type;
1299 cppchar_t c;
1301 if (token1->flags & NAMED_OP)
1302 a = CPP_NAME;
1303 if (token2->flags & NAMED_OP)
1304 b = CPP_NAME;
1306 c = EOF;
1307 if (token2->flags & DIGRAPH)
1308 c = digraph_spellings[(int) b - (int) CPP_FIRST_DIGRAPH][0];
1309 else if (token_spellings[b].category == SPELL_OPERATOR)
1310 c = token_spellings[b].name[0];
1312 /* Quickly get everything that can paste with an '='. */
1313 if ((int) a <= (int) CPP_LAST_EQ && c == '=')
1314 return 1;
1316 switch (a)
1318 case CPP_GREATER: return c == '>' || c == '?';
1319 case CPP_LESS: return c == '<' || c == '?' || c == '%' || c == ':';
1320 case CPP_PLUS: return c == '+';
1321 case CPP_MINUS: return c == '-' || c == '>';
1322 case CPP_DIV: return c == '/' || c == '*'; /* Comments. */
1323 case CPP_MOD: return c == ':' || c == '>';
1324 case CPP_AND: return c == '&';
1325 case CPP_OR: return c == '|';
1326 case CPP_COLON: return c == ':' || c == '>';
1327 case CPP_DEREF: return c == '*';
1328 case CPP_DOT: return c == '.' || c == '%' || b == CPP_NUMBER;
1329 case CPP_HASH: return c == '#' || c == '%'; /* Digraph form. */
1330 case CPP_NAME: return ((b == CPP_NUMBER
1331 && name_p (pfile, &token2->val.str))
1332 || b == CPP_NAME
1333 || b == CPP_CHAR || b == CPP_STRING); /* L */
1334 case CPP_NUMBER: return (b == CPP_NUMBER || b == CPP_NAME
1335 || c == '.' || c == '+' || c == '-');
1336 /* UCNs */
1337 case CPP_OTHER: return ((token1->val.str.text[0] == '\\'
1338 && b == CPP_NAME)
1339 || (CPP_OPTION (pfile, objc)
1340 && token1->val.str.text[0] == '@'
1341 && (b == CPP_NAME || b == CPP_STRING)));
1342 default: break;
1345 return 0;
1348 /* Output all the remaining tokens on the current line, and a newline
1349 character, to FP. Leading whitespace is removed. If there are
1350 macros, special token padding is not performed. */
1351 void
1352 cpp_output_line (pfile, fp)
1353 cpp_reader *pfile;
1354 FILE *fp;
1356 const cpp_token *token;
1358 token = cpp_get_token (pfile);
1359 while (token->type != CPP_EOF)
1361 cpp_output_token (token, fp);
1362 token = cpp_get_token (pfile);
1363 if (token->flags & PREV_WHITE)
1364 putc (' ', fp);
1367 putc ('\n', fp);
1370 /* Returns the value of a hexadecimal digit. */
1371 static unsigned int
1372 hex_digit_value (c)
1373 unsigned int c;
1375 if (hex_p (c))
1376 return hex_value (c);
1377 else
1378 abort ();
1381 /* Read a possible universal character name starting at *PSTR. */
1382 static cppchar_t
1383 maybe_read_ucn (pfile, pstr)
1384 cpp_reader *pfile;
1385 const uchar **pstr;
1387 cppchar_t result, c = (*pstr)[-1];
1389 result = _cpp_valid_ucn (pfile, pstr, false);
1390 if (result)
1392 if (CPP_WTRADITIONAL (pfile))
1393 cpp_error (pfile, DL_WARNING,
1394 "the meaning of '\\%c' is different in traditional C",
1395 (int) c);
1397 if (CPP_OPTION (pfile, EBCDIC))
1399 cpp_error (pfile, DL_ERROR,
1400 "universal character with an EBCDIC target");
1401 result = 0x3f; /* EBCDIC invalid character */
1405 return result;
1408 /* Returns the value of an escape sequence, truncated to the correct
1409 target precision. PSTR points to the input pointer, which is just
1410 after the backslash. LIMIT is how much text we have. WIDE is true
1411 if the escape sequence is part of a wide character constant or
1412 string literal. Handles all relevant diagnostics. */
1413 cppchar_t
1414 cpp_parse_escape (pfile, pstr, limit, wide)
1415 cpp_reader *pfile;
1416 const unsigned char **pstr;
1417 const unsigned char *limit;
1418 int wide;
1420 /* Values of \a \b \e \f \n \r \t \v respectively. */
1421 static const uchar ascii[] = { 7, 8, 27, 12, 10, 13, 9, 11 };
1422 static const uchar ebcdic[] = { 47, 22, 39, 12, 21, 13, 5, 11 };
1424 int unknown = 0;
1425 const unsigned char *str = *pstr, *charconsts;
1426 cppchar_t c, ucn, mask;
1427 unsigned int width;
1429 if (CPP_OPTION (pfile, EBCDIC))
1430 charconsts = ebcdic;
1431 else
1432 charconsts = ascii;
1434 if (wide)
1435 width = CPP_OPTION (pfile, wchar_precision);
1436 else
1437 width = CPP_OPTION (pfile, char_precision);
1438 if (width < BITS_PER_CPPCHAR_T)
1439 mask = ((cppchar_t) 1 << width) - 1;
1440 else
1441 mask = ~0;
1443 c = *str++;
1444 switch (c)
1446 case '\\': case '\'': case '"': case '?': break;
1447 case 'b': c = charconsts[1]; break;
1448 case 'f': c = charconsts[3]; break;
1449 case 'n': c = charconsts[4]; break;
1450 case 'r': c = charconsts[5]; break;
1451 case 't': c = charconsts[6]; break;
1452 case 'v': c = charconsts[7]; break;
1454 case '(': case '{': case '[': case '%':
1455 /* '\(', etc, are used at beginning of line to avoid confusing Emacs.
1456 '\%' is used to prevent SCCS from getting confused. */
1457 unknown = CPP_PEDANTIC (pfile);
1458 break;
1460 case 'a':
1461 if (CPP_WTRADITIONAL (pfile))
1462 cpp_error (pfile, DL_WARNING,
1463 "the meaning of '\\a' is different in traditional C");
1464 c = charconsts[0];
1465 break;
1467 case 'e': case 'E':
1468 if (CPP_PEDANTIC (pfile))
1469 cpp_error (pfile, DL_PEDWARN,
1470 "non-ISO-standard escape sequence, '\\%c'", (int) c);
1471 c = charconsts[2];
1472 break;
1474 case 'u': case 'U':
1475 ucn = maybe_read_ucn (pfile, &str);
1476 if (ucn)
1477 c = ucn;
1478 else
1479 unknown = true;
1480 break;
1482 case 'x':
1483 if (CPP_WTRADITIONAL (pfile))
1484 cpp_error (pfile, DL_WARNING,
1485 "the meaning of '\\x' is different in traditional C");
1488 cppchar_t i = 0, overflow = 0;
1489 int digits_found = 0;
1491 while (str < limit)
1493 c = *str;
1494 if (! ISXDIGIT (c))
1495 break;
1496 str++;
1497 overflow |= i ^ (i << 4 >> 4);
1498 i = (i << 4) + hex_digit_value (c);
1499 digits_found = 1;
1502 if (!digits_found)
1503 cpp_error (pfile, DL_ERROR,
1504 "\\x used with no following hex digits");
1506 if (overflow | (i != (i & mask)))
1508 cpp_error (pfile, DL_PEDWARN,
1509 "hex escape sequence out of range");
1510 i &= mask;
1512 c = i;
1514 break;
1516 case '0': case '1': case '2': case '3':
1517 case '4': case '5': case '6': case '7':
1519 size_t count = 0;
1520 cppchar_t i = c - '0';
1522 while (str < limit && ++count < 3)
1524 c = *str;
1525 if (c < '0' || c > '7')
1526 break;
1527 str++;
1528 i = (i << 3) + c - '0';
1531 if (i != (i & mask))
1533 cpp_error (pfile, DL_PEDWARN,
1534 "octal escape sequence out of range");
1535 i &= mask;
1537 c = i;
1539 break;
1541 default:
1542 unknown = 1;
1543 break;
1546 if (unknown)
1548 if (ISGRAPH (c))
1549 cpp_error (pfile, DL_PEDWARN,
1550 "unknown escape sequence '\\%c'", (int) c);
1551 else
1552 cpp_error (pfile, DL_PEDWARN,
1553 "unknown escape sequence: '\\%03o'", (int) c);
1556 if (c > mask)
1558 cpp_error (pfile, DL_PEDWARN, "escape sequence out of range for its type");
1559 c &= mask;
1562 *pstr = str;
1563 return c;
1566 /* Interpret a (possibly wide) character constant in TOKEN.
1567 WARN_MULTI warns about multi-character charconsts. PCHARS_SEEN
1568 points to a variable that is filled in with the number of
1569 characters seen, and UNSIGNEDP to a variable that indicates whether
1570 the result has signed type. */
1571 cppchar_t
1572 cpp_interpret_charconst (pfile, token, pchars_seen, unsignedp)
1573 cpp_reader *pfile;
1574 const cpp_token *token;
1575 unsigned int *pchars_seen;
1576 int *unsignedp;
1578 const unsigned char *str, *limit;
1579 unsigned int chars_seen = 0;
1580 size_t width, max_chars;
1581 cppchar_t c, mask, result = 0;
1582 bool unsigned_p;
1584 str = token->val.str.text + 1 + (token->type == CPP_WCHAR);
1585 limit = token->val.str.text + token->val.str.len - 1;
1587 if (token->type == CPP_CHAR)
1589 width = CPP_OPTION (pfile, char_precision);
1590 max_chars = CPP_OPTION (pfile, int_precision) / width;
1591 unsigned_p = CPP_OPTION (pfile, unsigned_char);
1593 else
1595 width = CPP_OPTION (pfile, wchar_precision);
1596 max_chars = 1;
1597 unsigned_p = CPP_OPTION (pfile, unsigned_wchar);
1600 if (width < BITS_PER_CPPCHAR_T)
1601 mask = ((cppchar_t) 1 << width) - 1;
1602 else
1603 mask = ~0;
1605 while (str < limit)
1607 c = *str++;
1609 if (c == '\\')
1610 c = cpp_parse_escape (pfile, &str, limit, token->type == CPP_WCHAR);
1612 #ifdef MAP_CHARACTER
1613 if (ISPRINT (c))
1614 c = MAP_CHARACTER (c);
1615 #endif
1617 chars_seen++;
1619 /* Truncate the character, scale the result and merge the two. */
1620 c &= mask;
1621 if (width < BITS_PER_CPPCHAR_T)
1622 result = (result << width) | c;
1623 else
1624 result = c;
1627 if (chars_seen == 0)
1628 cpp_error (pfile, DL_ERROR, "empty character constant");
1629 else if (chars_seen > 1)
1631 /* Multichar charconsts are of type int and therefore signed. */
1632 unsigned_p = 0;
1634 if (chars_seen > max_chars)
1636 chars_seen = max_chars;
1637 cpp_error (pfile, DL_WARNING,
1638 "character constant too long for its type");
1640 else if (CPP_OPTION (pfile, warn_multichar))
1641 cpp_error (pfile, DL_WARNING, "multi-character character constant");
1644 /* Sign-extend or truncate the constant to cppchar_t. The value is
1645 in WIDTH bits, but for multi-char charconsts it's value is the
1646 full target type's width. */
1647 if (chars_seen > 1)
1648 width *= max_chars;
1649 if (width < BITS_PER_CPPCHAR_T)
1651 mask = ((cppchar_t) 1 << width) - 1;
1652 if (unsigned_p || !(result & (1 << (width - 1))))
1653 result &= mask;
1654 else
1655 result |= ~mask;
1658 *pchars_seen = chars_seen;
1659 *unsignedp = unsigned_p;
1660 return result;
1663 /* Memory buffers. Changing these three constants can have a dramatic
1664 effect on performance. The values here are reasonable defaults,
1665 but might be tuned. If you adjust them, be sure to test across a
1666 range of uses of cpplib, including heavy nested function-like macro
1667 expansion. Also check the change in peak memory usage (NJAMD is a
1668 good tool for this). */
1669 #define MIN_BUFF_SIZE 8000
1670 #define BUFF_SIZE_UPPER_BOUND(MIN_SIZE) (MIN_BUFF_SIZE + (MIN_SIZE) * 3 / 2)
1671 #define EXTENDED_BUFF_SIZE(BUFF, MIN_EXTRA) \
1672 (MIN_EXTRA + ((BUFF)->limit - (BUFF)->cur) * 2)
1674 #if MIN_BUFF_SIZE > BUFF_SIZE_UPPER_BOUND (0)
1675 #error BUFF_SIZE_UPPER_BOUND must be at least as large as MIN_BUFF_SIZE!
1676 #endif
1678 /* Create a new allocation buffer. Place the control block at the end
1679 of the buffer, so that buffer overflows will cause immediate chaos. */
1680 static _cpp_buff *
1681 new_buff (len)
1682 size_t len;
1684 _cpp_buff *result;
1685 unsigned char *base;
1687 if (len < MIN_BUFF_SIZE)
1688 len = MIN_BUFF_SIZE;
1689 len = CPP_ALIGN (len);
1691 base = xmalloc (len + sizeof (_cpp_buff));
1692 result = (_cpp_buff *) (base + len);
1693 result->base = base;
1694 result->cur = base;
1695 result->limit = base + len;
1696 result->next = NULL;
1697 return result;
1700 /* Place a chain of unwanted allocation buffers on the free list. */
1701 void
1702 _cpp_release_buff (pfile, buff)
1703 cpp_reader *pfile;
1704 _cpp_buff *buff;
1706 _cpp_buff *end = buff;
1708 while (end->next)
1709 end = end->next;
1710 end->next = pfile->free_buffs;
1711 pfile->free_buffs = buff;
1714 /* Return a free buffer of size at least MIN_SIZE. */
1715 _cpp_buff *
1716 _cpp_get_buff (pfile, min_size)
1717 cpp_reader *pfile;
1718 size_t min_size;
1720 _cpp_buff *result, **p;
1722 for (p = &pfile->free_buffs;; p = &(*p)->next)
1724 size_t size;
1726 if (*p == NULL)
1727 return new_buff (min_size);
1728 result = *p;
1729 size = result->limit - result->base;
1730 /* Return a buffer that's big enough, but don't waste one that's
1731 way too big. */
1732 if (size >= min_size && size <= BUFF_SIZE_UPPER_BOUND (min_size))
1733 break;
1736 *p = result->next;
1737 result->next = NULL;
1738 result->cur = result->base;
1739 return result;
1742 /* Creates a new buffer with enough space to hold the uncommitted
1743 remaining bytes of BUFF, and at least MIN_EXTRA more bytes. Copies
1744 the excess bytes to the new buffer. Chains the new buffer after
1745 BUFF, and returns the new buffer. */
1746 _cpp_buff *
1747 _cpp_append_extend_buff (pfile, buff, min_extra)
1748 cpp_reader *pfile;
1749 _cpp_buff *buff;
1750 size_t min_extra;
1752 size_t size = EXTENDED_BUFF_SIZE (buff, min_extra);
1753 _cpp_buff *new_buff = _cpp_get_buff (pfile, size);
1755 buff->next = new_buff;
1756 memcpy (new_buff->base, buff->cur, BUFF_ROOM (buff));
1757 return new_buff;
1760 /* Creates a new buffer with enough space to hold the uncommitted
1761 remaining bytes of the buffer pointed to by BUFF, and at least
1762 MIN_EXTRA more bytes. Copies the excess bytes to the new buffer.
1763 Chains the new buffer before the buffer pointed to by BUFF, and
1764 updates the pointer to point to the new buffer. */
1765 void
1766 _cpp_extend_buff (pfile, pbuff, min_extra)
1767 cpp_reader *pfile;
1768 _cpp_buff **pbuff;
1769 size_t min_extra;
1771 _cpp_buff *new_buff, *old_buff = *pbuff;
1772 size_t size = EXTENDED_BUFF_SIZE (old_buff, min_extra);
1774 new_buff = _cpp_get_buff (pfile, size);
1775 memcpy (new_buff->base, old_buff->cur, BUFF_ROOM (old_buff));
1776 new_buff->next = old_buff;
1777 *pbuff = new_buff;
1780 /* Free a chain of buffers starting at BUFF. */
1781 void
1782 _cpp_free_buff (buff)
1783 _cpp_buff *buff;
1785 _cpp_buff *next;
1787 for (; buff; buff = next)
1789 next = buff->next;
1790 free (buff->base);
1794 /* Allocate permanent, unaligned storage of length LEN. */
1795 unsigned char *
1796 _cpp_unaligned_alloc (pfile, len)
1797 cpp_reader *pfile;
1798 size_t len;
1800 _cpp_buff *buff = pfile->u_buff;
1801 unsigned char *result = buff->cur;
1803 if (len > (size_t) (buff->limit - result))
1805 buff = _cpp_get_buff (pfile, len);
1806 buff->next = pfile->u_buff;
1807 pfile->u_buff = buff;
1808 result = buff->cur;
1811 buff->cur = result + len;
1812 return result;
1815 /* Allocate permanent, unaligned storage of length LEN from a_buff.
1816 That buffer is used for growing allocations when saving macro
1817 replacement lists in a #define, and when parsing an answer to an
1818 assertion in #assert, #unassert or #if (and therefore possibly
1819 whilst expanding macros). It therefore must not be used by any
1820 code that they might call: specifically the lexer and the guts of
1821 the macro expander.
1823 All existing other uses clearly fit this restriction: storing
1824 registered pragmas during initialization. */
1825 unsigned char *
1826 _cpp_aligned_alloc (pfile, len)
1827 cpp_reader *pfile;
1828 size_t len;
1830 _cpp_buff *buff = pfile->a_buff;
1831 unsigned char *result = buff->cur;
1833 if (len > (size_t) (buff->limit - result))
1835 buff = _cpp_get_buff (pfile, len);
1836 buff->next = pfile->a_buff;
1837 pfile->a_buff = buff;
1838 result = buff->cur;
1841 buff->cur = result + len;
1842 return result;