* config/arm/elf.h (ASM_OUTPUT_ALIGNED_COMMON): Remove definition.
[official-gcc.git] / gcc / cpplex.c
blob4f0767e42c8eaa916340d2433ebb999fdba2018d
1 /* CPP Library - lexical analysis.
2 Copyright (C) 2000, 2001, 2002 Free Software Foundation, Inc.
3 Contributed by Per Bothner, 1994-95.
4 Based on CCCP program by Paul Rubin, June 1986
5 Adapted to ANSI C, Richard Stallman, Jan 1987
6 Broken out to separate file, Zack Weinberg, Mar 2000
8 This program is free software; you can redistribute it and/or modify it
9 under the terms of the GNU General Public License as published by the
10 Free Software Foundation; either version 2, or (at your option) any
11 later version.
13 This program is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with this program; if not, write to the Free Software
20 Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "tm.h"
26 #include "cpplib.h"
27 #include "cpphash.h"
29 enum spell_type
31 SPELL_OPERATOR = 0,
32 SPELL_IDENT,
33 SPELL_LITERAL,
34 SPELL_NONE
37 struct token_spelling
39 enum spell_type category;
40 const unsigned char *name;
43 static const unsigned char *const digraph_spellings[] =
44 { U"%:", U"%:%:", U"<:", U":>", U"<%", U"%>" };
46 #define OP(e, s) { SPELL_OPERATOR, U s },
47 #define TK(e, s) { s, U STRINGX (e) },
48 static const struct token_spelling token_spellings[N_TTYPES] = { TTYPE_TABLE };
49 #undef OP
50 #undef TK
52 #define TOKEN_SPELL(token) (token_spellings[(token)->type].category)
53 #define TOKEN_NAME(token) (token_spellings[(token)->type].name)
55 static void add_line_note PARAMS ((cpp_buffer *, const uchar *, unsigned int));
56 static int skip_line_comment PARAMS ((cpp_reader *));
57 static void skip_whitespace PARAMS ((cpp_reader *, cppchar_t));
58 static cpp_hashnode *lex_identifier PARAMS ((cpp_reader *, const uchar *));
59 static void lex_number PARAMS ((cpp_reader *, cpp_string *));
60 static bool forms_identifier_p PARAMS ((cpp_reader *, int));
61 static void lex_string PARAMS ((cpp_reader *, cpp_token *, const uchar *));
62 static void save_comment PARAMS ((cpp_reader *, cpp_token *, const uchar *,
63 cppchar_t));
64 static void create_literal PARAMS ((cpp_reader *, cpp_token *, const uchar *,
65 unsigned int, enum cpp_ttype));
66 static int name_p PARAMS ((cpp_reader *, const cpp_string *));
67 static cppchar_t maybe_read_ucn PARAMS ((cpp_reader *, const uchar **));
68 static tokenrun *next_tokenrun PARAMS ((tokenrun *));
70 static unsigned int hex_digit_value PARAMS ((unsigned int));
71 static _cpp_buff *new_buff PARAMS ((size_t));
74 /* Utility routine:
76 Compares, the token TOKEN to the NUL-terminated string STRING.
77 TOKEN must be a CPP_NAME. Returns 1 for equal, 0 for unequal. */
78 int
79 cpp_ideq (token, string)
80 const cpp_token *token;
81 const char *string;
83 if (token->type != CPP_NAME)
84 return 0;
86 return !ustrcmp (NODE_NAME (token->val.node), (const uchar *) string);
89 /* Record a note TYPE at byte POS into the current cleaned logical
90 line. */
91 static void
92 add_line_note (buffer, pos, type)
93 cpp_buffer *buffer;
94 const uchar *pos;
95 unsigned int type;
97 if (buffer->notes_used == buffer->notes_cap)
99 buffer->notes_cap = buffer->notes_cap * 2 + 200;
100 buffer->notes = (_cpp_line_note *)
101 xrealloc (buffer->notes, buffer->notes_cap * sizeof (_cpp_line_note));
104 buffer->notes[buffer->notes_used].pos = pos;
105 buffer->notes[buffer->notes_used].type = type;
106 buffer->notes_used++;
109 /* Returns with a logical line that contains no escaped newlines or
110 trigraphs. This is a time-critical inner loop. */
111 void
112 _cpp_clean_line (pfile)
113 cpp_reader *pfile;
115 cpp_buffer *buffer;
116 const uchar *s;
117 uchar c, *d, *p;
119 buffer = pfile->buffer;
120 buffer->cur_note = buffer->notes_used = 0;
121 buffer->cur = buffer->line_base = buffer->next_line;
122 buffer->need_line = false;
123 s = buffer->next_line - 1;
125 if (!buffer->from_stage3)
127 d = (uchar *) s;
129 for (;;)
131 c = *++s;
132 *++d = c;
134 if (c == '\n' || c == '\r')
136 /* Handle DOS line endings. */
137 if (c == '\r' && s != buffer->rlimit && s[1] == '\n')
138 s++;
139 if (s == buffer->rlimit)
140 break;
142 /* Escaped? */
143 p = d;
144 while (p != buffer->next_line && is_nvspace (p[-1]))
145 p--;
146 if (p == buffer->next_line || p[-1] != '\\')
147 break;
149 add_line_note (buffer, p - 1, p != d ? ' ': '\\');
150 d = p - 2;
151 buffer->next_line = p - 1;
153 else if (c == '?' && s[1] == '?' && _cpp_trigraph_map[s[2]])
155 /* Add a note regardless, for the benefit of -Wtrigraphs. */
156 add_line_note (buffer, d, s[2]);
157 if (CPP_OPTION (pfile, trigraphs))
159 *d = _cpp_trigraph_map[s[2]];
160 s += 2;
165 else
168 s++;
169 while (*s != '\n' && *s != '\r');
170 d = (uchar *) s;
172 /* Handle DOS line endings. */
173 if (*s == '\r' && s != buffer->rlimit && s[1] == '\n')
174 s++;
177 *d = '\n';
178 /* A sentinel note that should never be processed. */
179 add_line_note (buffer, d + 1, '\n');
180 buffer->next_line = s + 1;
183 /* Process the notes created by add_line_note as far as the current
184 location. */
185 void
186 _cpp_process_line_notes (pfile, in_comment)
187 cpp_reader *pfile;
188 int in_comment;
190 cpp_buffer *buffer = pfile->buffer;
192 for (;;)
194 _cpp_line_note *note = &buffer->notes[buffer->cur_note];
195 unsigned int col;
197 if (note->pos > buffer->cur)
198 break;
200 buffer->cur_note++;
201 col = CPP_BUF_COLUMN (buffer, note->pos + 1);
203 if (note->type == '\\' || note->type == ' ')
205 if (note->type == ' ' && !in_comment)
206 cpp_error_with_line (pfile, DL_WARNING, pfile->line, col,
207 "backslash and newline separated by space");
209 if (buffer->next_line > buffer->rlimit)
211 cpp_error_with_line (pfile, DL_PEDWARN, pfile->line, col,
212 "backslash-newline at end of file");
213 /* Prevent "no newline at end of file" warning. */
214 buffer->next_line = buffer->rlimit;
217 buffer->line_base = note->pos;
218 pfile->line++;
220 else if (_cpp_trigraph_map[note->type])
222 if (!in_comment && CPP_OPTION (pfile, warn_trigraphs))
224 if (CPP_OPTION (pfile, trigraphs))
225 cpp_error_with_line (pfile, DL_WARNING, pfile->line, col,
226 "trigraph ??%c converted to %c",
227 note->type,
228 (int) _cpp_trigraph_map[note->type]);
229 else
230 cpp_error_with_line (pfile, DL_WARNING, pfile->line, col,
231 "trigraph ??%c ignored",
232 note->type);
235 else
236 abort ();
240 /* Skip a C-style block comment. We find the end of the comment by
241 seeing if an asterisk is before every '/' we encounter. Returns
242 nonzero if comment terminated by EOF, zero otherwise.
244 Buffer->cur points to the initial asterisk of the comment. */
245 bool
246 _cpp_skip_block_comment (pfile)
247 cpp_reader *pfile;
249 cpp_buffer *buffer = pfile->buffer;
250 cppchar_t c;
252 buffer->cur++;
253 if (*buffer->cur == '/')
254 buffer->cur++;
256 for (;;)
258 c = *buffer->cur++;
260 /* People like decorating comments with '*', so check for '/'
261 instead for efficiency. */
262 if (c == '/')
264 if (buffer->cur[-2] == '*')
265 break;
267 /* Warn about potential nested comments, but not if the '/'
268 comes immediately before the true comment delimiter.
269 Don't bother to get it right across escaped newlines. */
270 if (CPP_OPTION (pfile, warn_comments)
271 && buffer->cur[0] == '*' && buffer->cur[1] != '/')
272 cpp_error_with_line (pfile, DL_WARNING,
273 pfile->line, CPP_BUF_COL (buffer),
274 "\"/*\" within comment");
276 else if (c == '\n')
278 buffer->cur--;
279 _cpp_process_line_notes (pfile, true);
280 if (buffer->next_line >= buffer->rlimit)
281 return true;
282 _cpp_clean_line (pfile);
283 pfile->line++;
287 return false;
290 /* Skip a C++ line comment, leaving buffer->cur pointing to the
291 terminating newline. Handles escaped newlines. Returns nonzero
292 if a multiline comment. */
293 static int
294 skip_line_comment (pfile)
295 cpp_reader *pfile;
297 cpp_buffer *buffer = pfile->buffer;
298 unsigned int orig_line = pfile->line;
300 while (*buffer->cur != '\n')
301 buffer->cur++;
303 _cpp_process_line_notes (pfile, true);
304 return orig_line != pfile->line;
307 /* Skips whitespace, saving the next non-whitespace character. */
308 static void
309 skip_whitespace (pfile, c)
310 cpp_reader *pfile;
311 cppchar_t c;
313 cpp_buffer *buffer = pfile->buffer;
314 bool saw_NUL = false;
318 /* Horizontal space always OK. */
319 if (c == ' ' || c == '\t')
321 /* Just \f \v or \0 left. */
322 else if (c == '\0')
323 saw_NUL = true;
324 else if (pfile->state.in_directive && CPP_PEDANTIC (pfile))
325 cpp_error_with_line (pfile, DL_PEDWARN, pfile->line,
326 CPP_BUF_COL (buffer),
327 "%s in preprocessing directive",
328 c == '\f' ? "form feed" : "vertical tab");
330 c = *buffer->cur++;
332 /* We only want non-vertical space, i.e. ' ' \t \f \v \0. */
333 while (is_nvspace (c));
335 if (saw_NUL)
336 cpp_error (pfile, DL_WARNING, "null character(s) ignored");
338 buffer->cur--;
341 /* See if the characters of a number token are valid in a name (no
342 '.', '+' or '-'). */
343 static int
344 name_p (pfile, string)
345 cpp_reader *pfile;
346 const cpp_string *string;
348 unsigned int i;
350 for (i = 0; i < string->len; i++)
351 if (!is_idchar (string->text[i]))
352 return 0;
354 return 1;
357 /* Returns TRUE if the sequence starting at buffer->cur is invalid in
358 an identifier. FIRST is TRUE if this starts an identifier. */
359 static bool
360 forms_identifier_p (pfile, first)
361 cpp_reader *pfile;
362 int first;
364 cpp_buffer *buffer = pfile->buffer;
366 if (*buffer->cur == '$')
368 if (!CPP_OPTION (pfile, dollars_in_ident))
369 return false;
371 buffer->cur++;
372 if (CPP_PEDANTIC (pfile)
373 && !pfile->state.skipping
374 && !pfile->warned_dollar)
376 pfile->warned_dollar = true;
377 cpp_error (pfile, DL_PEDWARN, "'$' in identifier or number");
380 return true;
383 /* Is this a syntactically valid UCN? */
384 if (0 && *buffer->cur == '\\'
385 && (buffer->cur[1] == 'u' || buffer->cur[1] == 'U'))
387 buffer->cur += 2;
388 if (_cpp_valid_ucn (pfile, &buffer->cur, 1 + !first))
389 return true;
390 buffer->cur -= 2;
393 return false;
396 /* Lex an identifier starting at BUFFER->CUR - 1. */
397 static cpp_hashnode *
398 lex_identifier (pfile, base)
399 cpp_reader *pfile;
400 const uchar *base;
402 cpp_hashnode *result;
403 const uchar *cur;
407 cur = pfile->buffer->cur;
409 /* N.B. ISIDNUM does not include $. */
410 while (ISIDNUM (*cur))
411 cur++;
413 pfile->buffer->cur = cur;
415 while (forms_identifier_p (pfile, false));
417 result = (cpp_hashnode *)
418 ht_lookup (pfile->hash_table, base, cur - base, HT_ALLOC);
420 /* Rarely, identifiers require diagnostics when lexed. */
421 if (__builtin_expect ((result->flags & NODE_DIAGNOSTIC)
422 && !pfile->state.skipping, 0))
424 /* It is allowed to poison the same identifier twice. */
425 if ((result->flags & NODE_POISONED) && !pfile->state.poisoned_ok)
426 cpp_error (pfile, DL_ERROR, "attempt to use poisoned \"%s\"",
427 NODE_NAME (result));
429 /* Constraint 6.10.3.5: __VA_ARGS__ should only appear in the
430 replacement list of a variadic macro. */
431 if (result == pfile->spec_nodes.n__VA_ARGS__
432 && !pfile->state.va_args_ok)
433 cpp_error (pfile, DL_PEDWARN,
434 "__VA_ARGS__ can only appear in the expansion of a C99 variadic macro");
437 return result;
440 /* Lex a number to NUMBER starting at BUFFER->CUR - 1. */
441 static void
442 lex_number (pfile, number)
443 cpp_reader *pfile;
444 cpp_string *number;
446 const uchar *cur;
447 const uchar *base;
448 uchar *dest;
450 base = pfile->buffer->cur - 1;
453 cur = pfile->buffer->cur;
455 /* N.B. ISIDNUM does not include $. */
456 while (ISIDNUM (*cur) || *cur == '.' || VALID_SIGN (*cur, cur[-1]))
457 cur++;
459 pfile->buffer->cur = cur;
461 while (forms_identifier_p (pfile, false));
463 number->len = cur - base;
464 dest = _cpp_unaligned_alloc (pfile, number->len + 1);
465 memcpy (dest, base, number->len);
466 dest[number->len] = '\0';
467 number->text = dest;
470 /* Create a token of type TYPE with a literal spelling. */
471 static void
472 create_literal (pfile, token, base, len, type)
473 cpp_reader *pfile;
474 cpp_token *token;
475 const uchar *base;
476 unsigned int len;
477 enum cpp_ttype type;
479 uchar *dest = _cpp_unaligned_alloc (pfile, len + 1);
481 memcpy (dest, base, len);
482 dest[len] = '\0';
483 token->type = type;
484 token->val.str.len = len;
485 token->val.str.text = dest;
488 /* Lexes a string, character constant, or angle-bracketed header file
489 name. The stored string contains the spelling, including opening
490 quote and leading any leading 'L'. It returns the type of the
491 literal, or CPP_OTHER if it was not properly terminated.
493 The spelling is NUL-terminated, but it is not guaranteed that this
494 is the first NUL since embedded NULs are preserved. */
495 static void
496 lex_string (pfile, token, base)
497 cpp_reader *pfile;
498 cpp_token *token;
499 const uchar *base;
501 bool saw_NUL = false;
502 const uchar *cur;
503 cppchar_t terminator;
504 enum cpp_ttype type;
506 cur = base;
507 terminator = *cur++;
508 if (terminator == 'L')
509 terminator = *cur++;
510 if (terminator == '\"')
511 type = *base == 'L' ? CPP_WSTRING: CPP_STRING;
512 else if (terminator == '\'')
513 type = *base == 'L' ? CPP_WCHAR: CPP_CHAR;
514 else
515 terminator = '>', type = CPP_HEADER_NAME;
517 for (;;)
519 cppchar_t c = *cur++;
521 /* In #include-style directives, terminators are not escapable. */
522 if (c == '\\' && !pfile->state.angled_headers && *cur != '\n')
523 cur++;
524 else if (c == terminator)
525 break;
526 else if (c == '\n')
528 cur--;
529 type = CPP_OTHER;
530 break;
532 else if (c == '\0')
533 saw_NUL = true;
536 if (saw_NUL && !pfile->state.skipping)
537 cpp_error (pfile, DL_WARNING, "null character(s) preserved in literal");
539 pfile->buffer->cur = cur;
540 create_literal (pfile, token, base, cur - base, type);
543 /* The stored comment includes the comment start and any terminator. */
544 static void
545 save_comment (pfile, token, from, type)
546 cpp_reader *pfile;
547 cpp_token *token;
548 const unsigned char *from;
549 cppchar_t type;
551 unsigned char *buffer;
552 unsigned int len, clen;
554 len = pfile->buffer->cur - from + 1; /* + 1 for the initial '/'. */
556 /* C++ comments probably (not definitely) have moved past a new
557 line, which we don't want to save in the comment. */
558 if (is_vspace (pfile->buffer->cur[-1]))
559 len--;
561 /* If we are currently in a directive, then we need to store all
562 C++ comments as C comments internally, and so we need to
563 allocate a little extra space in that case.
565 Note that the only time we encounter a directive here is
566 when we are saving comments in a "#define". */
567 clen = (pfile->state.in_directive && type == '/') ? len + 2 : len;
569 buffer = _cpp_unaligned_alloc (pfile, clen);
571 token->type = CPP_COMMENT;
572 token->val.str.len = clen;
573 token->val.str.text = buffer;
575 buffer[0] = '/';
576 memcpy (buffer + 1, from, len - 1);
578 /* Finish conversion to a C comment, if necessary. */
579 if (pfile->state.in_directive && type == '/')
581 buffer[1] = '*';
582 buffer[clen - 2] = '*';
583 buffer[clen - 1] = '/';
587 /* Allocate COUNT tokens for RUN. */
588 void
589 _cpp_init_tokenrun (run, count)
590 tokenrun *run;
591 unsigned int count;
593 run->base = xnewvec (cpp_token, count);
594 run->limit = run->base + count;
595 run->next = NULL;
598 /* Returns the next tokenrun, or creates one if there is none. */
599 static tokenrun *
600 next_tokenrun (run)
601 tokenrun *run;
603 if (run->next == NULL)
605 run->next = xnew (tokenrun);
606 run->next->prev = run;
607 _cpp_init_tokenrun (run->next, 250);
610 return run->next;
613 /* Allocate a single token that is invalidated at the same time as the
614 rest of the tokens on the line. Has its line and col set to the
615 same as the last lexed token, so that diagnostics appear in the
616 right place. */
617 cpp_token *
618 _cpp_temp_token (pfile)
619 cpp_reader *pfile;
621 cpp_token *old, *result;
623 old = pfile->cur_token - 1;
624 if (pfile->cur_token == pfile->cur_run->limit)
626 pfile->cur_run = next_tokenrun (pfile->cur_run);
627 pfile->cur_token = pfile->cur_run->base;
630 result = pfile->cur_token++;
631 result->line = old->line;
632 result->col = old->col;
633 return result;
636 /* Lex a token into RESULT (external interface). Takes care of issues
637 like directive handling, token lookahead, multiple include
638 optimization and skipping. */
639 const cpp_token *
640 _cpp_lex_token (pfile)
641 cpp_reader *pfile;
643 cpp_token *result;
645 for (;;)
647 if (pfile->cur_token == pfile->cur_run->limit)
649 pfile->cur_run = next_tokenrun (pfile->cur_run);
650 pfile->cur_token = pfile->cur_run->base;
653 if (pfile->lookaheads)
655 pfile->lookaheads--;
656 result = pfile->cur_token++;
658 else
659 result = _cpp_lex_direct (pfile);
661 if (result->flags & BOL)
663 /* Is this a directive. If _cpp_handle_directive returns
664 false, it is an assembler #. */
665 if (result->type == CPP_HASH
666 /* 6.10.3 p 11: Directives in a list of macro arguments
667 gives undefined behavior. This implementation
668 handles the directive as normal. */
669 && pfile->state.parsing_args != 1
670 && _cpp_handle_directive (pfile, result->flags & PREV_WHITE))
671 continue;
672 if (pfile->cb.line_change && !pfile->state.skipping)
673 (*pfile->cb.line_change)(pfile, result, pfile->state.parsing_args);
676 /* We don't skip tokens in directives. */
677 if (pfile->state.in_directive)
678 break;
680 /* Outside a directive, invalidate controlling macros. At file
681 EOF, _cpp_lex_direct takes care of popping the buffer, so we never
682 get here and MI optimisation works. */
683 pfile->mi_valid = false;
685 if (!pfile->state.skipping || result->type == CPP_EOF)
686 break;
689 return result;
692 /* Returns true if a fresh line has been loaded. */
693 bool
694 _cpp_get_fresh_line (pfile)
695 cpp_reader *pfile;
697 /* We can't get a new line until we leave the current directive. */
698 if (pfile->state.in_directive)
699 return false;
701 for (;;)
703 cpp_buffer *buffer = pfile->buffer;
705 if (!buffer->need_line)
706 return true;
708 if (buffer->next_line < buffer->rlimit)
710 _cpp_clean_line (pfile);
711 return true;
714 /* First, get out of parsing arguments state. */
715 if (pfile->state.parsing_args)
716 return false;
718 /* End of buffer. Non-empty files should end in a newline. */
719 if (buffer->buf != buffer->rlimit
720 && buffer->next_line > buffer->rlimit
721 && !buffer->from_stage3)
723 /* Only warn once. */
724 buffer->next_line = buffer->rlimit;
725 cpp_error_with_line (pfile, DL_PEDWARN, pfile->line - 1,
726 CPP_BUF_COLUMN (buffer, buffer->cur),
727 "no newline at end of file");
730 if (!buffer->prev)
731 return false;
733 if (buffer->return_at_eof)
735 _cpp_pop_buffer (pfile);
736 return false;
739 _cpp_pop_buffer (pfile);
743 #define IF_NEXT_IS(CHAR, THEN_TYPE, ELSE_TYPE) \
744 do \
746 result->type = ELSE_TYPE; \
747 if (*buffer->cur == CHAR) \
748 buffer->cur++, result->type = THEN_TYPE; \
750 while (0)
752 /* Lex a token into pfile->cur_token, which is also incremented, to
753 get diagnostics pointing to the correct location.
755 Does not handle issues such as token lookahead, multiple-include
756 optimisation, directives, skipping etc. This function is only
757 suitable for use by _cpp_lex_token, and in special cases like
758 lex_expansion_token which doesn't care for any of these issues.
760 When meeting a newline, returns CPP_EOF if parsing a directive,
761 otherwise returns to the start of the token buffer if permissible.
762 Returns the location of the lexed token. */
763 cpp_token *
764 _cpp_lex_direct (pfile)
765 cpp_reader *pfile;
767 cppchar_t c;
768 cpp_buffer *buffer;
769 const unsigned char *comment_start;
770 cpp_token *result = pfile->cur_token++;
772 fresh_line:
773 result->flags = 0;
774 if (pfile->buffer->need_line)
776 if (!_cpp_get_fresh_line (pfile))
778 result->type = CPP_EOF;
779 if (!pfile->state.in_directive)
781 /* Tell the compiler the line number of the EOF token. */
782 result->line = pfile->line;
783 result->flags = BOL;
785 return result;
787 if (!pfile->keep_tokens)
789 pfile->cur_run = &pfile->base_run;
790 result = pfile->base_run.base;
791 pfile->cur_token = result + 1;
793 result->flags = BOL;
794 if (pfile->state.parsing_args == 2)
795 result->flags |= PREV_WHITE;
797 buffer = pfile->buffer;
798 update_tokens_line:
799 result->line = pfile->line;
801 skipped_white:
802 if (buffer->cur >= buffer->notes[buffer->cur_note].pos
803 && !pfile->overlaid_buffer)
805 _cpp_process_line_notes (pfile, false);
806 result->line = pfile->line;
808 c = *buffer->cur++;
809 result->col = CPP_BUF_COLUMN (buffer, buffer->cur);
811 switch (c)
813 case ' ': case '\t': case '\f': case '\v': case '\0':
814 result->flags |= PREV_WHITE;
815 skip_whitespace (pfile, c);
816 goto skipped_white;
818 case '\n':
819 pfile->line++;
820 buffer->need_line = true;
821 goto fresh_line;
823 case '0': case '1': case '2': case '3': case '4':
824 case '5': case '6': case '7': case '8': case '9':
825 result->type = CPP_NUMBER;
826 lex_number (pfile, &result->val.str);
827 break;
829 case 'L':
830 /* 'L' may introduce wide characters or strings. */
831 if (*buffer->cur == '\'' || *buffer->cur == '"')
833 lex_string (pfile, result, buffer->cur - 1);
834 break;
836 /* Fall through. */
838 case '_':
839 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
840 case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
841 case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
842 case 's': case 't': case 'u': case 'v': case 'w': case 'x':
843 case 'y': case 'z':
844 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
845 case 'G': case 'H': case 'I': case 'J': case 'K':
846 case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
847 case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
848 case 'Y': case 'Z':
849 result->type = CPP_NAME;
850 result->val.node = lex_identifier (pfile, buffer->cur - 1);
852 /* Convert named operators to their proper types. */
853 if (result->val.node->flags & NODE_OPERATOR)
855 result->flags |= NAMED_OP;
856 result->type = result->val.node->directive_index;
858 break;
860 case '\'':
861 case '"':
862 lex_string (pfile, result, buffer->cur - 1);
863 break;
865 case '/':
866 /* A potential block or line comment. */
867 comment_start = buffer->cur;
868 c = *buffer->cur;
870 if (c == '*')
872 if (_cpp_skip_block_comment (pfile))
873 cpp_error (pfile, DL_ERROR, "unterminated comment");
875 else if (c == '/' && (CPP_OPTION (pfile, cplusplus_comments)
876 || CPP_IN_SYSTEM_HEADER (pfile)))
878 /* Warn about comments only if pedantically GNUC89, and not
879 in system headers. */
880 if (CPP_OPTION (pfile, lang) == CLK_GNUC89 && CPP_PEDANTIC (pfile)
881 && ! buffer->warned_cplusplus_comments)
883 cpp_error (pfile, DL_PEDWARN,
884 "C++ style comments are not allowed in ISO C90");
885 cpp_error (pfile, DL_PEDWARN,
886 "(this will be reported only once per input file)");
887 buffer->warned_cplusplus_comments = 1;
890 if (skip_line_comment (pfile) && CPP_OPTION (pfile, warn_comments))
891 cpp_error (pfile, DL_WARNING, "multi-line comment");
893 else if (c == '=')
895 buffer->cur++;
896 result->type = CPP_DIV_EQ;
897 break;
899 else
901 result->type = CPP_DIV;
902 break;
905 if (!pfile->state.save_comments)
907 result->flags |= PREV_WHITE;
908 goto update_tokens_line;
911 /* Save the comment as a token in its own right. */
912 save_comment (pfile, result, comment_start, c);
913 break;
915 case '<':
916 if (pfile->state.angled_headers)
918 lex_string (pfile, result, buffer->cur - 1);
919 break;
922 result->type = CPP_LESS;
923 if (*buffer->cur == '=')
924 buffer->cur++, result->type = CPP_LESS_EQ;
925 else if (*buffer->cur == '<')
927 buffer->cur++;
928 IF_NEXT_IS ('=', CPP_LSHIFT_EQ, CPP_LSHIFT);
930 else if (*buffer->cur == '?' && CPP_OPTION (pfile, cplusplus))
932 buffer->cur++;
933 IF_NEXT_IS ('=', CPP_MIN_EQ, CPP_MIN);
935 else if (CPP_OPTION (pfile, digraphs))
937 if (*buffer->cur == ':')
939 buffer->cur++;
940 result->flags |= DIGRAPH;
941 result->type = CPP_OPEN_SQUARE;
943 else if (*buffer->cur == '%')
945 buffer->cur++;
946 result->flags |= DIGRAPH;
947 result->type = CPP_OPEN_BRACE;
950 break;
952 case '>':
953 result->type = CPP_GREATER;
954 if (*buffer->cur == '=')
955 buffer->cur++, result->type = CPP_GREATER_EQ;
956 else if (*buffer->cur == '>')
958 buffer->cur++;
959 IF_NEXT_IS ('=', CPP_RSHIFT_EQ, CPP_RSHIFT);
961 else if (*buffer->cur == '?' && CPP_OPTION (pfile, cplusplus))
963 buffer->cur++;
964 IF_NEXT_IS ('=', CPP_MAX_EQ, CPP_MAX);
966 break;
968 case '%':
969 result->type = CPP_MOD;
970 if (*buffer->cur == '=')
971 buffer->cur++, result->type = CPP_MOD_EQ;
972 else if (CPP_OPTION (pfile, digraphs))
974 if (*buffer->cur == ':')
976 buffer->cur++;
977 result->flags |= DIGRAPH;
978 result->type = CPP_HASH;
979 if (*buffer->cur == '%' && buffer->cur[1] == ':')
980 buffer->cur += 2, result->type = CPP_PASTE;
982 else if (*buffer->cur == '>')
984 buffer->cur++;
985 result->flags |= DIGRAPH;
986 result->type = CPP_CLOSE_BRACE;
989 break;
991 case '.':
992 result->type = CPP_DOT;
993 if (ISDIGIT (*buffer->cur))
995 result->type = CPP_NUMBER;
996 lex_number (pfile, &result->val.str);
998 else if (*buffer->cur == '.' && buffer->cur[1] == '.')
999 buffer->cur += 2, result->type = CPP_ELLIPSIS;
1000 else if (*buffer->cur == '*' && CPP_OPTION (pfile, cplusplus))
1001 buffer->cur++, result->type = CPP_DOT_STAR;
1002 break;
1004 case '+':
1005 result->type = CPP_PLUS;
1006 if (*buffer->cur == '+')
1007 buffer->cur++, result->type = CPP_PLUS_PLUS;
1008 else if (*buffer->cur == '=')
1009 buffer->cur++, result->type = CPP_PLUS_EQ;
1010 break;
1012 case '-':
1013 result->type = CPP_MINUS;
1014 if (*buffer->cur == '>')
1016 buffer->cur++;
1017 result->type = CPP_DEREF;
1018 if (*buffer->cur == '*' && CPP_OPTION (pfile, cplusplus))
1019 buffer->cur++, result->type = CPP_DEREF_STAR;
1021 else if (*buffer->cur == '-')
1022 buffer->cur++, result->type = CPP_MINUS_MINUS;
1023 else if (*buffer->cur == '=')
1024 buffer->cur++, result->type = CPP_MINUS_EQ;
1025 break;
1027 case '&':
1028 result->type = CPP_AND;
1029 if (*buffer->cur == '&')
1030 buffer->cur++, result->type = CPP_AND_AND;
1031 else if (*buffer->cur == '=')
1032 buffer->cur++, result->type = CPP_AND_EQ;
1033 break;
1035 case '|':
1036 result->type = CPP_OR;
1037 if (*buffer->cur == '|')
1038 buffer->cur++, result->type = CPP_OR_OR;
1039 else if (*buffer->cur == '=')
1040 buffer->cur++, result->type = CPP_OR_EQ;
1041 break;
1043 case ':':
1044 result->type = CPP_COLON;
1045 if (*buffer->cur == ':' && CPP_OPTION (pfile, cplusplus))
1046 buffer->cur++, result->type = CPP_SCOPE;
1047 else if (*buffer->cur == '>' && CPP_OPTION (pfile, digraphs))
1049 buffer->cur++;
1050 result->flags |= DIGRAPH;
1051 result->type = CPP_CLOSE_SQUARE;
1053 break;
1055 case '*': IF_NEXT_IS ('=', CPP_MULT_EQ, CPP_MULT); break;
1056 case '=': IF_NEXT_IS ('=', CPP_EQ_EQ, CPP_EQ); break;
1057 case '!': IF_NEXT_IS ('=', CPP_NOT_EQ, CPP_NOT); break;
1058 case '^': IF_NEXT_IS ('=', CPP_XOR_EQ, CPP_XOR); break;
1059 case '#': IF_NEXT_IS ('#', CPP_PASTE, CPP_HASH); break;
1061 case '?': result->type = CPP_QUERY; break;
1062 case '~': result->type = CPP_COMPL; break;
1063 case ',': result->type = CPP_COMMA; break;
1064 case '(': result->type = CPP_OPEN_PAREN; break;
1065 case ')': result->type = CPP_CLOSE_PAREN; break;
1066 case '[': result->type = CPP_OPEN_SQUARE; break;
1067 case ']': result->type = CPP_CLOSE_SQUARE; break;
1068 case '{': result->type = CPP_OPEN_BRACE; break;
1069 case '}': result->type = CPP_CLOSE_BRACE; break;
1070 case ';': result->type = CPP_SEMICOLON; break;
1072 /* @ is a punctuator in Objective-C. */
1073 case '@': result->type = CPP_ATSIGN; break;
1075 case '$':
1076 case '\\':
1078 const uchar *base = --buffer->cur;
1080 if (forms_identifier_p (pfile, true))
1082 result->type = CPP_NAME;
1083 result->val.node = lex_identifier (pfile, base);
1084 break;
1086 buffer->cur++;
1089 default:
1090 create_literal (pfile, result, buffer->cur - 1, 1, CPP_OTHER);
1091 break;
1094 return result;
1097 /* An upper bound on the number of bytes needed to spell TOKEN.
1098 Does not include preceding whitespace. */
1099 unsigned int
1100 cpp_token_len (token)
1101 const cpp_token *token;
1103 unsigned int len;
1105 switch (TOKEN_SPELL (token))
1107 default: len = 4; break;
1108 case SPELL_LITERAL: len = token->val.str.len; break;
1109 case SPELL_IDENT: len = NODE_LEN (token->val.node); break;
1112 return len;
1115 /* Write the spelling of a token TOKEN to BUFFER. The buffer must
1116 already contain the enough space to hold the token's spelling.
1117 Returns a pointer to the character after the last character
1118 written. */
1119 unsigned char *
1120 cpp_spell_token (pfile, token, buffer)
1121 cpp_reader *pfile; /* Would be nice to be rid of this... */
1122 const cpp_token *token;
1123 unsigned char *buffer;
1125 switch (TOKEN_SPELL (token))
1127 case SPELL_OPERATOR:
1129 const unsigned char *spelling;
1130 unsigned char c;
1132 if (token->flags & DIGRAPH)
1133 spelling
1134 = digraph_spellings[(int) token->type - (int) CPP_FIRST_DIGRAPH];
1135 else if (token->flags & NAMED_OP)
1136 goto spell_ident;
1137 else
1138 spelling = TOKEN_NAME (token);
1140 while ((c = *spelling++) != '\0')
1141 *buffer++ = c;
1143 break;
1145 spell_ident:
1146 case SPELL_IDENT:
1147 memcpy (buffer, NODE_NAME (token->val.node), NODE_LEN (token->val.node));
1148 buffer += NODE_LEN (token->val.node);
1149 break;
1151 case SPELL_LITERAL:
1152 memcpy (buffer, token->val.str.text, token->val.str.len);
1153 buffer += token->val.str.len;
1154 break;
1156 case SPELL_NONE:
1157 cpp_error (pfile, DL_ICE, "unspellable token %s", TOKEN_NAME (token));
1158 break;
1161 return buffer;
1164 /* Returns TOKEN spelt as a null-terminated string. The string is
1165 freed when the reader is destroyed. Useful for diagnostics. */
1166 unsigned char *
1167 cpp_token_as_text (pfile, token)
1168 cpp_reader *pfile;
1169 const cpp_token *token;
1171 unsigned int len = cpp_token_len (token) + 1;
1172 unsigned char *start = _cpp_unaligned_alloc (pfile, len), *end;
1174 end = cpp_spell_token (pfile, token, start);
1175 end[0] = '\0';
1177 return start;
1180 /* Used by C front ends, which really should move to using
1181 cpp_token_as_text. */
1182 const char *
1183 cpp_type2name (type)
1184 enum cpp_ttype type;
1186 return (const char *) token_spellings[type].name;
1189 /* Writes the spelling of token to FP, without any preceding space.
1190 Separated from cpp_spell_token for efficiency - to avoid stdio
1191 double-buffering. */
1192 void
1193 cpp_output_token (token, fp)
1194 const cpp_token *token;
1195 FILE *fp;
1197 switch (TOKEN_SPELL (token))
1199 case SPELL_OPERATOR:
1201 const unsigned char *spelling;
1202 int c;
1204 if (token->flags & DIGRAPH)
1205 spelling
1206 = digraph_spellings[(int) token->type - (int) CPP_FIRST_DIGRAPH];
1207 else if (token->flags & NAMED_OP)
1208 goto spell_ident;
1209 else
1210 spelling = TOKEN_NAME (token);
1212 c = *spelling;
1214 putc (c, fp);
1215 while ((c = *++spelling) != '\0');
1217 break;
1219 spell_ident:
1220 case SPELL_IDENT:
1221 fwrite (NODE_NAME (token->val.node), 1, NODE_LEN (token->val.node), fp);
1222 break;
1224 case SPELL_LITERAL:
1225 fwrite (token->val.str.text, 1, token->val.str.len, fp);
1226 break;
1228 case SPELL_NONE:
1229 /* An error, most probably. */
1230 break;
1234 /* Compare two tokens. */
1236 _cpp_equiv_tokens (a, b)
1237 const cpp_token *a, *b;
1239 if (a->type == b->type && a->flags == b->flags)
1240 switch (TOKEN_SPELL (a))
1242 default: /* Keep compiler happy. */
1243 case SPELL_OPERATOR:
1244 return 1;
1245 case SPELL_NONE:
1246 return (a->type != CPP_MACRO_ARG || a->val.arg_no == b->val.arg_no);
1247 case SPELL_IDENT:
1248 return a->val.node == b->val.node;
1249 case SPELL_LITERAL:
1250 return (a->val.str.len == b->val.str.len
1251 && !memcmp (a->val.str.text, b->val.str.text,
1252 a->val.str.len));
1255 return 0;
1258 /* Returns nonzero if a space should be inserted to avoid an
1259 accidental token paste for output. For simplicity, it is
1260 conservative, and occasionally advises a space where one is not
1261 needed, e.g. "." and ".2". */
1263 cpp_avoid_paste (pfile, token1, token2)
1264 cpp_reader *pfile;
1265 const cpp_token *token1, *token2;
1267 enum cpp_ttype a = token1->type, b = token2->type;
1268 cppchar_t c;
1270 if (token1->flags & NAMED_OP)
1271 a = CPP_NAME;
1272 if (token2->flags & NAMED_OP)
1273 b = CPP_NAME;
1275 c = EOF;
1276 if (token2->flags & DIGRAPH)
1277 c = digraph_spellings[(int) b - (int) CPP_FIRST_DIGRAPH][0];
1278 else if (token_spellings[b].category == SPELL_OPERATOR)
1279 c = token_spellings[b].name[0];
1281 /* Quickly get everything that can paste with an '='. */
1282 if ((int) a <= (int) CPP_LAST_EQ && c == '=')
1283 return 1;
1285 switch (a)
1287 case CPP_GREATER: return c == '>' || c == '?';
1288 case CPP_LESS: return c == '<' || c == '?' || c == '%' || c == ':';
1289 case CPP_PLUS: return c == '+';
1290 case CPP_MINUS: return c == '-' || c == '>';
1291 case CPP_DIV: return c == '/' || c == '*'; /* Comments. */
1292 case CPP_MOD: return c == ':' || c == '>';
1293 case CPP_AND: return c == '&';
1294 case CPP_OR: return c == '|';
1295 case CPP_COLON: return c == ':' || c == '>';
1296 case CPP_DEREF: return c == '*';
1297 case CPP_DOT: return c == '.' || c == '%' || b == CPP_NUMBER;
1298 case CPP_HASH: return c == '#' || c == '%'; /* Digraph form. */
1299 case CPP_NAME: return ((b == CPP_NUMBER
1300 && name_p (pfile, &token2->val.str))
1301 || b == CPP_NAME
1302 || b == CPP_CHAR || b == CPP_STRING); /* L */
1303 case CPP_NUMBER: return (b == CPP_NUMBER || b == CPP_NAME
1304 || c == '.' || c == '+' || c == '-');
1305 /* UCNs */
1306 case CPP_OTHER: return ((token1->val.str.text[0] == '\\'
1307 && b == CPP_NAME)
1308 || (CPP_OPTION (pfile, objc)
1309 && token1->val.str.text[0] == '@'
1310 && (b == CPP_NAME || b == CPP_STRING)));
1311 default: break;
1314 return 0;
1317 /* Output all the remaining tokens on the current line, and a newline
1318 character, to FP. Leading whitespace is removed. If there are
1319 macros, special token padding is not performed. */
1320 void
1321 cpp_output_line (pfile, fp)
1322 cpp_reader *pfile;
1323 FILE *fp;
1325 const cpp_token *token;
1327 token = cpp_get_token (pfile);
1328 while (token->type != CPP_EOF)
1330 cpp_output_token (token, fp);
1331 token = cpp_get_token (pfile);
1332 if (token->flags & PREV_WHITE)
1333 putc (' ', fp);
1336 putc ('\n', fp);
1339 /* Returns the value of a hexadecimal digit. */
1340 static unsigned int
1341 hex_digit_value (c)
1342 unsigned int c;
1344 if (hex_p (c))
1345 return hex_value (c);
1346 else
1347 abort ();
1350 /* Read a possible universal character name starting at *PSTR. */
1351 static cppchar_t
1352 maybe_read_ucn (pfile, pstr)
1353 cpp_reader *pfile;
1354 const uchar **pstr;
1356 cppchar_t result, c = (*pstr)[-1];
1358 result = _cpp_valid_ucn (pfile, pstr, false);
1359 if (result)
1361 if (CPP_WTRADITIONAL (pfile))
1362 cpp_error (pfile, DL_WARNING,
1363 "the meaning of '\\%c' is different in traditional C",
1364 (int) c);
1366 if (CPP_OPTION (pfile, EBCDIC))
1368 cpp_error (pfile, DL_ERROR,
1369 "universal character with an EBCDIC target");
1370 result = 0x3f; /* EBCDIC invalid character */
1374 return result;
1377 /* Returns the value of an escape sequence, truncated to the correct
1378 target precision. PSTR points to the input pointer, which is just
1379 after the backslash. LIMIT is how much text we have. WIDE is true
1380 if the escape sequence is part of a wide character constant or
1381 string literal. Handles all relevant diagnostics. */
1382 cppchar_t
1383 cpp_parse_escape (pfile, pstr, limit, wide)
1384 cpp_reader *pfile;
1385 const unsigned char **pstr;
1386 const unsigned char *limit;
1387 int wide;
1389 /* Values of \a \b \e \f \n \r \t \v respectively. */
1390 static const uchar ascii[] = { 7, 8, 27, 12, 10, 13, 9, 11 };
1391 static const uchar ebcdic[] = { 47, 22, 39, 12, 21, 13, 5, 11 };
1393 int unknown = 0;
1394 const unsigned char *str = *pstr, *charconsts;
1395 cppchar_t c, ucn, mask;
1396 unsigned int width;
1398 if (CPP_OPTION (pfile, EBCDIC))
1399 charconsts = ebcdic;
1400 else
1401 charconsts = ascii;
1403 if (wide)
1404 width = CPP_OPTION (pfile, wchar_precision);
1405 else
1406 width = CPP_OPTION (pfile, char_precision);
1407 if (width < BITS_PER_CPPCHAR_T)
1408 mask = ((cppchar_t) 1 << width) - 1;
1409 else
1410 mask = ~0;
1412 c = *str++;
1413 switch (c)
1415 case '\\': case '\'': case '"': case '?': break;
1416 case 'b': c = charconsts[1]; break;
1417 case 'f': c = charconsts[3]; break;
1418 case 'n': c = charconsts[4]; break;
1419 case 'r': c = charconsts[5]; break;
1420 case 't': c = charconsts[6]; break;
1421 case 'v': c = charconsts[7]; break;
1423 case '(': case '{': case '[': case '%':
1424 /* '\(', etc, are used at beginning of line to avoid confusing Emacs.
1425 '\%' is used to prevent SCCS from getting confused. */
1426 unknown = CPP_PEDANTIC (pfile);
1427 break;
1429 case 'a':
1430 if (CPP_WTRADITIONAL (pfile))
1431 cpp_error (pfile, DL_WARNING,
1432 "the meaning of '\\a' is different in traditional C");
1433 c = charconsts[0];
1434 break;
1436 case 'e': case 'E':
1437 if (CPP_PEDANTIC (pfile))
1438 cpp_error (pfile, DL_PEDWARN,
1439 "non-ISO-standard escape sequence, '\\%c'", (int) c);
1440 c = charconsts[2];
1441 break;
1443 case 'u': case 'U':
1444 ucn = maybe_read_ucn (pfile, &str);
1445 if (ucn)
1446 c = ucn;
1447 else
1448 unknown = true;
1449 break;
1451 case 'x':
1452 if (CPP_WTRADITIONAL (pfile))
1453 cpp_error (pfile, DL_WARNING,
1454 "the meaning of '\\x' is different in traditional C");
1457 cppchar_t i = 0, overflow = 0;
1458 int digits_found = 0;
1460 while (str < limit)
1462 c = *str;
1463 if (! ISXDIGIT (c))
1464 break;
1465 str++;
1466 overflow |= i ^ (i << 4 >> 4);
1467 i = (i << 4) + hex_digit_value (c);
1468 digits_found = 1;
1471 if (!digits_found)
1472 cpp_error (pfile, DL_ERROR,
1473 "\\x used with no following hex digits");
1475 if (overflow | (i != (i & mask)))
1477 cpp_error (pfile, DL_PEDWARN,
1478 "hex escape sequence out of range");
1479 i &= mask;
1481 c = i;
1483 break;
1485 case '0': case '1': case '2': case '3':
1486 case '4': case '5': case '6': case '7':
1488 size_t count = 0;
1489 cppchar_t i = c - '0';
1491 while (str < limit && ++count < 3)
1493 c = *str;
1494 if (c < '0' || c > '7')
1495 break;
1496 str++;
1497 i = (i << 3) + c - '0';
1500 if (i != (i & mask))
1502 cpp_error (pfile, DL_PEDWARN,
1503 "octal escape sequence out of range");
1504 i &= mask;
1506 c = i;
1508 break;
1510 default:
1511 unknown = 1;
1512 break;
1515 if (unknown)
1517 if (ISGRAPH (c))
1518 cpp_error (pfile, DL_PEDWARN,
1519 "unknown escape sequence '\\%c'", (int) c);
1520 else
1521 cpp_error (pfile, DL_PEDWARN,
1522 "unknown escape sequence: '\\%03o'", (int) c);
1525 if (c > mask)
1527 cpp_error (pfile, DL_PEDWARN, "escape sequence out of range for its type");
1528 c &= mask;
1531 *pstr = str;
1532 return c;
1535 /* Interpret a (possibly wide) character constant in TOKEN.
1536 WARN_MULTI warns about multi-character charconsts. PCHARS_SEEN
1537 points to a variable that is filled in with the number of
1538 characters seen, and UNSIGNEDP to a variable that indicates whether
1539 the result has signed type. */
1540 cppchar_t
1541 cpp_interpret_charconst (pfile, token, pchars_seen, unsignedp)
1542 cpp_reader *pfile;
1543 const cpp_token *token;
1544 unsigned int *pchars_seen;
1545 int *unsignedp;
1547 const unsigned char *str, *limit;
1548 unsigned int chars_seen = 0;
1549 size_t width, max_chars;
1550 cppchar_t c, mask, result = 0;
1551 bool unsigned_p;
1553 str = token->val.str.text + 1 + (token->type == CPP_WCHAR);
1554 limit = token->val.str.text + token->val.str.len - 1;
1556 if (token->type == CPP_CHAR)
1558 width = CPP_OPTION (pfile, char_precision);
1559 max_chars = CPP_OPTION (pfile, int_precision) / width;
1560 unsigned_p = CPP_OPTION (pfile, unsigned_char);
1562 else
1564 width = CPP_OPTION (pfile, wchar_precision);
1565 max_chars = 1;
1566 unsigned_p = CPP_OPTION (pfile, unsigned_wchar);
1569 if (width < BITS_PER_CPPCHAR_T)
1570 mask = ((cppchar_t) 1 << width) - 1;
1571 else
1572 mask = ~0;
1574 while (str < limit)
1576 c = *str++;
1578 if (c == '\\')
1579 c = cpp_parse_escape (pfile, &str, limit, token->type == CPP_WCHAR);
1581 #ifdef MAP_CHARACTER
1582 if (ISPRINT (c))
1583 c = MAP_CHARACTER (c);
1584 #endif
1586 chars_seen++;
1588 /* Truncate the character, scale the result and merge the two. */
1589 c &= mask;
1590 if (width < BITS_PER_CPPCHAR_T)
1591 result = (result << width) | c;
1592 else
1593 result = c;
1596 if (chars_seen == 0)
1597 cpp_error (pfile, DL_ERROR, "empty character constant");
1598 else if (chars_seen > 1)
1600 /* Multichar charconsts are of type int and therefore signed. */
1601 unsigned_p = 0;
1603 if (chars_seen > max_chars)
1605 chars_seen = max_chars;
1606 cpp_error (pfile, DL_WARNING,
1607 "character constant too long for its type");
1609 else if (CPP_OPTION (pfile, warn_multichar))
1610 cpp_error (pfile, DL_WARNING, "multi-character character constant");
1613 /* Sign-extend or truncate the constant to cppchar_t. The value is
1614 in WIDTH bits, but for multi-char charconsts it's value is the
1615 full target type's width. */
1616 if (chars_seen > 1)
1617 width *= max_chars;
1618 if (width < BITS_PER_CPPCHAR_T)
1620 mask = ((cppchar_t) 1 << width) - 1;
1621 if (unsigned_p || !(result & (1 << (width - 1))))
1622 result &= mask;
1623 else
1624 result |= ~mask;
1627 *pchars_seen = chars_seen;
1628 *unsignedp = unsigned_p;
1629 return result;
1632 /* Memory buffers. Changing these three constants can have a dramatic
1633 effect on performance. The values here are reasonable defaults,
1634 but might be tuned. If you adjust them, be sure to test across a
1635 range of uses of cpplib, including heavy nested function-like macro
1636 expansion. Also check the change in peak memory usage (NJAMD is a
1637 good tool for this). */
1638 #define MIN_BUFF_SIZE 8000
1639 #define BUFF_SIZE_UPPER_BOUND(MIN_SIZE) (MIN_BUFF_SIZE + (MIN_SIZE) * 3 / 2)
1640 #define EXTENDED_BUFF_SIZE(BUFF, MIN_EXTRA) \
1641 (MIN_EXTRA + ((BUFF)->limit - (BUFF)->cur) * 2)
1643 #if MIN_BUFF_SIZE > BUFF_SIZE_UPPER_BOUND (0)
1644 #error BUFF_SIZE_UPPER_BOUND must be at least as large as MIN_BUFF_SIZE!
1645 #endif
1647 /* Create a new allocation buffer. Place the control block at the end
1648 of the buffer, so that buffer overflows will cause immediate chaos. */
1649 static _cpp_buff *
1650 new_buff (len)
1651 size_t len;
1653 _cpp_buff *result;
1654 unsigned char *base;
1656 if (len < MIN_BUFF_SIZE)
1657 len = MIN_BUFF_SIZE;
1658 len = CPP_ALIGN (len);
1660 base = xmalloc (len + sizeof (_cpp_buff));
1661 result = (_cpp_buff *) (base + len);
1662 result->base = base;
1663 result->cur = base;
1664 result->limit = base + len;
1665 result->next = NULL;
1666 return result;
1669 /* Place a chain of unwanted allocation buffers on the free list. */
1670 void
1671 _cpp_release_buff (pfile, buff)
1672 cpp_reader *pfile;
1673 _cpp_buff *buff;
1675 _cpp_buff *end = buff;
1677 while (end->next)
1678 end = end->next;
1679 end->next = pfile->free_buffs;
1680 pfile->free_buffs = buff;
1683 /* Return a free buffer of size at least MIN_SIZE. */
1684 _cpp_buff *
1685 _cpp_get_buff (pfile, min_size)
1686 cpp_reader *pfile;
1687 size_t min_size;
1689 _cpp_buff *result, **p;
1691 for (p = &pfile->free_buffs;; p = &(*p)->next)
1693 size_t size;
1695 if (*p == NULL)
1696 return new_buff (min_size);
1697 result = *p;
1698 size = result->limit - result->base;
1699 /* Return a buffer that's big enough, but don't waste one that's
1700 way too big. */
1701 if (size >= min_size && size <= BUFF_SIZE_UPPER_BOUND (min_size))
1702 break;
1705 *p = result->next;
1706 result->next = NULL;
1707 result->cur = result->base;
1708 return result;
1711 /* Creates a new buffer with enough space to hold the uncommitted
1712 remaining bytes of BUFF, and at least MIN_EXTRA more bytes. Copies
1713 the excess bytes to the new buffer. Chains the new buffer after
1714 BUFF, and returns the new buffer. */
1715 _cpp_buff *
1716 _cpp_append_extend_buff (pfile, buff, min_extra)
1717 cpp_reader *pfile;
1718 _cpp_buff *buff;
1719 size_t min_extra;
1721 size_t size = EXTENDED_BUFF_SIZE (buff, min_extra);
1722 _cpp_buff *new_buff = _cpp_get_buff (pfile, size);
1724 buff->next = new_buff;
1725 memcpy (new_buff->base, buff->cur, BUFF_ROOM (buff));
1726 return new_buff;
1729 /* Creates a new buffer with enough space to hold the uncommitted
1730 remaining bytes of the buffer pointed to by BUFF, and at least
1731 MIN_EXTRA more bytes. Copies the excess bytes to the new buffer.
1732 Chains the new buffer before the buffer pointed to by BUFF, and
1733 updates the pointer to point to the new buffer. */
1734 void
1735 _cpp_extend_buff (pfile, pbuff, min_extra)
1736 cpp_reader *pfile;
1737 _cpp_buff **pbuff;
1738 size_t min_extra;
1740 _cpp_buff *new_buff, *old_buff = *pbuff;
1741 size_t size = EXTENDED_BUFF_SIZE (old_buff, min_extra);
1743 new_buff = _cpp_get_buff (pfile, size);
1744 memcpy (new_buff->base, old_buff->cur, BUFF_ROOM (old_buff));
1745 new_buff->next = old_buff;
1746 *pbuff = new_buff;
1749 /* Free a chain of buffers starting at BUFF. */
1750 void
1751 _cpp_free_buff (buff)
1752 _cpp_buff *buff;
1754 _cpp_buff *next;
1756 for (; buff; buff = next)
1758 next = buff->next;
1759 free (buff->base);
1763 /* Allocate permanent, unaligned storage of length LEN. */
1764 unsigned char *
1765 _cpp_unaligned_alloc (pfile, len)
1766 cpp_reader *pfile;
1767 size_t len;
1769 _cpp_buff *buff = pfile->u_buff;
1770 unsigned char *result = buff->cur;
1772 if (len > (size_t) (buff->limit - result))
1774 buff = _cpp_get_buff (pfile, len);
1775 buff->next = pfile->u_buff;
1776 pfile->u_buff = buff;
1777 result = buff->cur;
1780 buff->cur = result + len;
1781 return result;
1784 /* Allocate permanent, unaligned storage of length LEN from a_buff.
1785 That buffer is used for growing allocations when saving macro
1786 replacement lists in a #define, and when parsing an answer to an
1787 assertion in #assert, #unassert or #if (and therefore possibly
1788 whilst expanding macros). It therefore must not be used by any
1789 code that they might call: specifically the lexer and the guts of
1790 the macro expander.
1792 All existing other uses clearly fit this restriction: storing
1793 registered pragmas during initialization. */
1794 unsigned char *
1795 _cpp_aligned_alloc (pfile, len)
1796 cpp_reader *pfile;
1797 size_t len;
1799 _cpp_buff *buff = pfile->a_buff;
1800 unsigned char *result = buff->cur;
1802 if (len > (size_t) (buff->limit - result))
1804 buff = _cpp_get_buff (pfile, len);
1805 buff->next = pfile->a_buff;
1806 pfile->a_buff = buff;
1807 result = buff->cur;
1810 buff->cur = result + len;
1811 return result;