Use 'a' operand code for prefetch instruction.
[official-gcc.git] / gcc / cpplex.c
blob45d28b66ab20444b2c970f6fff3f8d76bf0088e9
1 /* CPP Library - lexical analysis.
2 Copyright (C) 2000, 2001 Free Software Foundation, Inc.
3 Contributed by Per Bothner, 1994-95.
4 Based on CCCP program by Paul Rubin, June 1986
5 Adapted to ANSI C, Richard Stallman, Jan 1987
6 Broken out to separate file, Zack Weinberg, Mar 2000
7 Single-pass line tokenization by Neil Booth, April 2000
9 This program is free software; you can redistribute it and/or modify it
10 under the terms of the GNU General Public License as published by the
11 Free Software Foundation; either version 2, or (at your option) any
12 later version.
14 This program is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
19 You should have received a copy of the GNU General Public License
20 along with this program; if not, write to the Free Software
21 Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
23 #include "config.h"
24 #include "system.h"
25 #include "cpplib.h"
26 #include "cpphash.h"
28 /* MULTIBYTE_CHARS support only works for native compilers.
29 ??? Ideally what we want is to model widechar support after
30 the current floating point support. */
31 #ifdef CROSS_COMPILE
32 #undef MULTIBYTE_CHARS
33 #endif
35 #ifdef MULTIBYTE_CHARS
36 #include "mbchar.h"
37 #include <locale.h>
38 #endif
40 /* Tokens with SPELL_STRING store their spelling in the token list,
41 and it's length in the token->val.name.len. */
42 enum spell_type
44 SPELL_OPERATOR = 0,
45 SPELL_CHAR,
46 SPELL_IDENT,
47 SPELL_NUMBER,
48 SPELL_STRING,
49 SPELL_NONE
52 struct token_spelling
54 enum spell_type category;
55 const unsigned char *name;
58 static const unsigned char *const digraph_spellings[] =
59 { U"%:", U"%:%:", U"<:", U":>", U"<%", U"%>" };
61 #define OP(e, s) { SPELL_OPERATOR, U s },
62 #define TK(e, s) { s, U STRINGX (e) },
63 static const struct token_spelling token_spellings[N_TTYPES] = { TTYPE_TABLE };
64 #undef OP
65 #undef TK
67 #define TOKEN_SPELL(token) (token_spellings[(token)->type].category)
68 #define TOKEN_NAME(token) (token_spellings[(token)->type].name)
69 #define BACKUP() do {buffer->cur = buffer->backup_to;} while (0)
71 static void handle_newline PARAMS ((cpp_reader *));
72 static cppchar_t skip_escaped_newlines PARAMS ((cpp_reader *));
73 static cppchar_t get_effective_char PARAMS ((cpp_reader *));
75 static int skip_block_comment PARAMS ((cpp_reader *));
76 static int skip_line_comment PARAMS ((cpp_reader *));
77 static void adjust_column PARAMS ((cpp_reader *));
78 static int skip_whitespace PARAMS ((cpp_reader *, cppchar_t));
79 static cpp_hashnode *parse_identifier PARAMS ((cpp_reader *));
80 static cpp_hashnode *parse_identifier_slow PARAMS ((cpp_reader *,
81 const U_CHAR *));
82 static void parse_number PARAMS ((cpp_reader *, cpp_string *, cppchar_t, int));
83 static int unescaped_terminator_p PARAMS ((cpp_reader *, const U_CHAR *));
84 static void parse_string PARAMS ((cpp_reader *, cpp_token *, cppchar_t));
85 static void unterminated PARAMS ((cpp_reader *, int));
86 static bool trigraph_p PARAMS ((cpp_reader *));
87 static void save_comment PARAMS ((cpp_reader *, cpp_token *, const U_CHAR *));
88 static int name_p PARAMS ((cpp_reader *, const cpp_string *));
89 static int maybe_read_ucs PARAMS ((cpp_reader *, const unsigned char **,
90 const unsigned char *, unsigned int *));
91 static tokenrun *next_tokenrun PARAMS ((tokenrun *));
93 static unsigned int hex_digit_value PARAMS ((unsigned int));
94 static _cpp_buff *new_buff PARAMS ((size_t));
96 /* Utility routine:
98 Compares, the token TOKEN to the NUL-terminated string STRING.
99 TOKEN must be a CPP_NAME. Returns 1 for equal, 0 for unequal. */
102 cpp_ideq (token, string)
103 const cpp_token *token;
104 const char *string;
106 if (token->type != CPP_NAME)
107 return 0;
109 return !ustrcmp (NODE_NAME (token->val.node), (const U_CHAR *) string);
112 /* Call when meeting a newline, assumed to be in buffer->cur[-1].
113 Returns with buffer->cur pointing to the character immediately
114 following the newline (combination). */
115 static void
116 handle_newline (pfile)
117 cpp_reader *pfile;
119 cpp_buffer *buffer = pfile->buffer;
121 /* Handle CR-LF and LF-CR. Most other implementations (e.g. java)
122 only accept CR-LF; maybe we should fall back to that behaviour? */
123 if (buffer->cur[-1] + buffer->cur[0] == '\r' + '\n')
124 buffer->cur++;
126 buffer->line_base = buffer->cur;
127 buffer->col_adjust = 0;
128 pfile->line++;
131 /* Subroutine of skip_escaped_newlines; called when a 3-character
132 sequence beginning with "??" is encountered. buffer->cur points to
133 the second '?'.
135 Warn if necessary, and returns true if the sequence forms a
136 trigraph and the trigraph should be honoured. */
137 static bool
138 trigraph_p (pfile)
139 cpp_reader *pfile;
141 cpp_buffer *buffer = pfile->buffer;
142 cppchar_t from_char = buffer->cur[1];
143 bool accept;
145 if (!_cpp_trigraph_map[from_char])
146 return false;
148 accept = CPP_OPTION (pfile, trigraphs);
150 /* Don't warn about trigraphs in comments. */
151 if (CPP_OPTION (pfile, warn_trigraphs) && !pfile->state.lexing_comment)
153 if (accept)
154 cpp_warning_with_line (pfile, pfile->line, CPP_BUF_COL (buffer) - 1,
155 "trigraph ??%c converted to %c",
156 (int) from_char,
157 (int) _cpp_trigraph_map[from_char]);
158 else if (buffer->cur != buffer->last_Wtrigraphs)
160 buffer->last_Wtrigraphs = buffer->cur;
161 cpp_warning_with_line (pfile, pfile->line,
162 CPP_BUF_COL (buffer) - 1,
163 "trigraph ??%c ignored", (int) from_char);
167 return accept;
170 /* Skips any escaped newlines introduced by '?' or a '\\', assumed to
171 lie in buffer->cur[-1]. Returns the next byte, which will be in
172 buffer->cur[-1]. This routine performs preprocessing stages 1 and
173 2 of the ISO C standard. */
174 static cppchar_t
175 skip_escaped_newlines (pfile)
176 cpp_reader *pfile;
178 cpp_buffer *buffer = pfile->buffer;
179 cppchar_t next = buffer->cur[-1];
181 /* Only do this if we apply stages 1 and 2. */
182 if (!buffer->from_stage3)
184 const unsigned char *saved_cur;
185 cppchar_t next1;
189 if (next == '?')
191 if (buffer->cur[0] != '?' || !trigraph_p (pfile))
192 break;
194 /* Translate the trigraph. */
195 next = _cpp_trigraph_map[buffer->cur[1]];
196 buffer->cur += 2;
197 if (next != '\\')
198 break;
201 if (buffer->cur == buffer->rlimit)
202 break;
204 /* We have a backslash, and room for at least one more
205 character. Skip horizontal whitespace. */
206 saved_cur = buffer->cur;
208 next1 = *buffer->cur++;
209 while (is_nvspace (next1) && buffer->cur < buffer->rlimit);
211 if (!is_vspace (next1))
213 buffer->cur = saved_cur;
214 break;
217 if (saved_cur != buffer->cur - 1
218 && !pfile->state.lexing_comment)
219 cpp_warning (pfile, "backslash and newline separated by space");
221 handle_newline (pfile);
222 buffer->backup_to = buffer->cur;
223 if (buffer->cur == buffer->rlimit)
225 cpp_pedwarn (pfile, "backslash-newline at end of file");
226 next = EOF;
228 else
229 next = *buffer->cur++;
231 while (next == '\\' || next == '?');
234 return next;
237 /* Obtain the next character, after trigraph conversion and skipping
238 an arbitrarily long string of escaped newlines. The common case of
239 no trigraphs or escaped newlines falls through quickly. On return,
240 buffer->backup_to points to where to return to if the character is
241 not to be processed. */
242 static cppchar_t
243 get_effective_char (pfile)
244 cpp_reader *pfile;
246 cppchar_t next;
247 cpp_buffer *buffer = pfile->buffer;
249 buffer->backup_to = buffer->cur;
250 next = *buffer->cur++;
251 if (__builtin_expect (next == '?' || next == '\\', 0))
252 next = skip_escaped_newlines (pfile);
254 return next;
257 /* Skip a C-style block comment. We find the end of the comment by
258 seeing if an asterisk is before every '/' we encounter. Returns
259 non-zero if comment terminated by EOF, zero otherwise. */
260 static int
261 skip_block_comment (pfile)
262 cpp_reader *pfile;
264 cpp_buffer *buffer = pfile->buffer;
265 cppchar_t c = EOF, prevc = EOF;
267 pfile->state.lexing_comment = 1;
268 while (buffer->cur != buffer->rlimit)
270 prevc = c, c = *buffer->cur++;
272 /* FIXME: For speed, create a new character class of characters
273 of interest inside block comments. */
274 if (c == '?' || c == '\\')
275 c = skip_escaped_newlines (pfile);
277 /* People like decorating comments with '*', so check for '/'
278 instead for efficiency. */
279 if (c == '/')
281 if (prevc == '*')
282 break;
284 /* Warn about potential nested comments, but not if the '/'
285 comes immediately before the true comment delimiter.
286 Don't bother to get it right across escaped newlines. */
287 if (CPP_OPTION (pfile, warn_comments)
288 && buffer->cur[0] == '*' && buffer->cur[1] != '/')
289 cpp_warning_with_line (pfile,
290 pfile->line, CPP_BUF_COL (buffer),
291 "\"/*\" within comment");
293 else if (is_vspace (c))
294 handle_newline (pfile);
295 else if (c == '\t')
296 adjust_column (pfile);
299 pfile->state.lexing_comment = 0;
300 return c != '/' || prevc != '*';
303 /* Skip a C++ line comment, leaving buffer->cur pointing to the
304 terminating newline. Handles escaped newlines. Returns non-zero
305 if a multiline comment. */
306 static int
307 skip_line_comment (pfile)
308 cpp_reader *pfile;
310 cpp_buffer *buffer = pfile->buffer;
311 unsigned int orig_line = pfile->line;
312 cppchar_t c;
314 pfile->state.lexing_comment = 1;
317 if (buffer->cur == buffer->rlimit)
318 goto at_eof;
320 c = *buffer->cur++;
321 if (c == '?' || c == '\\')
322 c = skip_escaped_newlines (pfile);
324 while (!is_vspace (c));
326 /* Step back over the newline, except at EOF. */
327 buffer->cur--;
328 at_eof:
330 pfile->state.lexing_comment = 0;
331 return orig_line != pfile->line;
334 /* pfile->buffer->cur is one beyond the \t character. Update
335 col_adjust so we track the column correctly. */
336 static void
337 adjust_column (pfile)
338 cpp_reader *pfile;
340 cpp_buffer *buffer = pfile->buffer;
341 unsigned int col = CPP_BUF_COL (buffer) - 1; /* Zero-based column. */
343 /* Round it up to multiple of the tabstop, but subtract 1 since the
344 tab itself occupies a character position. */
345 buffer->col_adjust += (CPP_OPTION (pfile, tabstop)
346 - col % CPP_OPTION (pfile, tabstop)) - 1;
349 /* Skips whitespace, saving the next non-whitespace character.
350 Adjusts pfile->col_adjust to account for tabs. Without this,
351 tokens might be assigned an incorrect column. */
352 static int
353 skip_whitespace (pfile, c)
354 cpp_reader *pfile;
355 cppchar_t c;
357 cpp_buffer *buffer = pfile->buffer;
358 unsigned int warned = 0;
362 /* Horizontal space always OK. */
363 if (c == ' ')
365 else if (c == '\t')
366 adjust_column (pfile);
367 /* Just \f \v or \0 left. */
368 else if (c == '\0')
370 if (buffer->cur - 1 == buffer->rlimit)
371 return 0;
372 if (!warned)
374 cpp_warning (pfile, "null character(s) ignored");
375 warned = 1;
378 else if (pfile->state.in_directive && CPP_PEDANTIC (pfile))
379 cpp_pedwarn_with_line (pfile, pfile->line,
380 CPP_BUF_COL (buffer),
381 "%s in preprocessing directive",
382 c == '\f' ? "form feed" : "vertical tab");
384 c = *buffer->cur++;
386 /* We only want non-vertical space, i.e. ' ' \t \f \v \0. */
387 while (is_nvspace (c));
389 buffer->cur--;
390 return 1;
393 /* See if the characters of a number token are valid in a name (no
394 '.', '+' or '-'). */
395 static int
396 name_p (pfile, string)
397 cpp_reader *pfile;
398 const cpp_string *string;
400 unsigned int i;
402 for (i = 0; i < string->len; i++)
403 if (!is_idchar (string->text[i]))
404 return 0;
406 return 1;
409 /* Parse an identifier, skipping embedded backslash-newlines. This is
410 a critical inner loop. The common case is an identifier which has
411 not been split by backslash-newline, does not contain a dollar
412 sign, and has already been scanned (roughly 10:1 ratio of
413 seen:unseen identifiers in normal code; the distribution is
414 Poisson-like). Second most common case is a new identifier, not
415 split and no dollar sign. The other possibilities are rare and
416 have been relegated to parse_identifier_slow. */
418 static cpp_hashnode *
419 parse_identifier (pfile)
420 cpp_reader *pfile;
422 cpp_hashnode *result;
423 const U_CHAR *cur;
425 /* Fast-path loop. Skim over a normal identifier.
426 N.B. ISIDNUM does not include $. */
427 cur = pfile->buffer->cur;
428 while (ISIDNUM (*cur))
429 cur++;
431 /* Check for slow-path cases. */
432 if (*cur == '?' || *cur == '\\' || *cur == '$')
433 result = parse_identifier_slow (pfile, cur);
434 else
436 const U_CHAR *base = pfile->buffer->cur - 1;
437 result = (cpp_hashnode *)
438 ht_lookup (pfile->hash_table, base, cur - base, HT_ALLOC);
439 pfile->buffer->cur = cur;
442 /* Rarely, identifiers require diagnostics when lexed.
443 XXX Has to be forced out of the fast path. */
444 if (__builtin_expect ((result->flags & NODE_DIAGNOSTIC)
445 && !pfile->state.skipping, 0))
447 /* It is allowed to poison the same identifier twice. */
448 if ((result->flags & NODE_POISONED) && !pfile->state.poisoned_ok)
449 cpp_error (pfile, "attempt to use poisoned \"%s\"",
450 NODE_NAME (result));
452 /* Constraint 6.10.3.5: __VA_ARGS__ should only appear in the
453 replacement list of a variadic macro. */
454 if (result == pfile->spec_nodes.n__VA_ARGS__
455 && !pfile->state.va_args_ok)
456 cpp_pedwarn (pfile,
457 "__VA_ARGS__ can only appear in the expansion of a C99 variadic macro");
460 return result;
463 /* Slow path. This handles identifiers which have been split, and
464 identifiers which contain dollar signs. The part of the identifier
465 from PFILE->buffer->cur-1 to CUR has already been scanned. */
466 static cpp_hashnode *
467 parse_identifier_slow (pfile, cur)
468 cpp_reader *pfile;
469 const U_CHAR *cur;
471 cpp_buffer *buffer = pfile->buffer;
472 const U_CHAR *base = buffer->cur - 1;
473 struct obstack *stack = &pfile->hash_table->stack;
474 unsigned int c, saw_dollar = 0, len;
476 /* Copy the part of the token which is known to be okay. */
477 obstack_grow (stack, base, cur - base);
479 /* Now process the part which isn't. We are looking at one of
480 '$', '\\', or '?' on entry to this loop. */
481 c = *cur++;
482 buffer->cur = cur;
485 while (is_idchar (c))
487 obstack_1grow (stack, c);
489 if (c == '$')
490 saw_dollar++;
492 c = *buffer->cur++;
495 /* Potential escaped newline? */
496 buffer->backup_to = buffer->cur - 1;
497 if (c != '?' && c != '\\')
498 break;
499 c = skip_escaped_newlines (pfile);
501 while (is_idchar (c));
503 /* Step back over the unwanted char. */
504 BACKUP ();
506 /* $ is not an identifier character in the standard, but is commonly
507 accepted as an extension. Don't warn about it in skipped
508 conditional blocks. */
509 if (saw_dollar && CPP_PEDANTIC (pfile) && ! pfile->state.skipping)
510 cpp_pedwarn (pfile, "'$' character(s) in identifier");
512 /* Identifiers are null-terminated. */
513 len = obstack_object_size (stack);
514 obstack_1grow (stack, '\0');
516 return (cpp_hashnode *)
517 ht_lookup (pfile->hash_table, obstack_finish (stack), len, HT_ALLOCED);
520 /* Parse a number, skipping embedded backslash-newlines. */
521 static void
522 parse_number (pfile, number, c, leading_period)
523 cpp_reader *pfile;
524 cpp_string *number;
525 cppchar_t c;
526 int leading_period;
528 cpp_buffer *buffer = pfile->buffer;
529 unsigned char *dest, *limit;
531 dest = BUFF_FRONT (pfile->u_buff);
532 limit = BUFF_LIMIT (pfile->u_buff);
534 /* Place a leading period. */
535 if (leading_period)
537 if (dest == limit)
539 _cpp_extend_buff (pfile, &pfile->u_buff, 1);
540 dest = BUFF_FRONT (pfile->u_buff);
541 limit = BUFF_LIMIT (pfile->u_buff);
543 *dest++ = '.';
550 /* Need room for terminating null. */
551 if ((size_t) (limit - dest) < 2)
553 size_t len_so_far = dest - BUFF_FRONT (pfile->u_buff);
554 _cpp_extend_buff (pfile, &pfile->u_buff, 2);
555 dest = BUFF_FRONT (pfile->u_buff) + len_so_far;
556 limit = BUFF_LIMIT (pfile->u_buff);
558 *dest++ = c;
560 c = *buffer->cur++;
562 while (is_numchar (c) || c == '.' || VALID_SIGN (c, dest[-1]));
564 /* Potential escaped newline? */
565 buffer->backup_to = buffer->cur - 1;
566 if (c != '?' && c != '\\')
567 break;
568 c = skip_escaped_newlines (pfile);
570 while (is_numchar (c) || c == '.' || VALID_SIGN (c, dest[-1]));
572 /* Step back over the unwanted char. */
573 BACKUP ();
575 /* Null-terminate the number. */
576 *dest = '\0';
578 number->text = BUFF_FRONT (pfile->u_buff);
579 number->len = dest - number->text;
580 BUFF_FRONT (pfile->u_buff) = dest + 1;
583 /* Subroutine of parse_string. Emits error for unterminated strings. */
584 static void
585 unterminated (pfile, term)
586 cpp_reader *pfile;
587 int term;
589 cpp_error (pfile, "missing terminating %c character", term);
591 if (term == '\"' && pfile->mls_line && pfile->mls_line != pfile->line)
593 cpp_error_with_line (pfile, pfile->mls_line, pfile->mls_col,
594 "possible start of unterminated string literal");
595 pfile->mls_line = 0;
599 /* Subroutine of parse_string. */
600 static int
601 unescaped_terminator_p (pfile, dest)
602 cpp_reader *pfile;
603 const unsigned char *dest;
605 const unsigned char *start, *temp;
607 /* In #include-style directives, terminators are not escapeable. */
608 if (pfile->state.angled_headers)
609 return 1;
611 start = BUFF_FRONT (pfile->u_buff);
613 /* An odd number of consecutive backslashes represents an escaped
614 terminator. */
615 for (temp = dest; temp > start && temp[-1] == '\\'; temp--)
618 return ((dest - temp) & 1) == 0;
621 /* Parses a string, character constant, or angle-bracketed header file
622 name. Handles embedded trigraphs and escaped newlines. The stored
623 string is guaranteed NUL-terminated, but it is not guaranteed that
624 this is the first NUL since embedded NULs are preserved.
625 Multi-line strings are allowed, but they are deprecated.
627 When this function returns, buffer->cur points to the next
628 character to be processed. */
629 static void
630 parse_string (pfile, token, terminator)
631 cpp_reader *pfile;
632 cpp_token *token;
633 cppchar_t terminator;
635 cpp_buffer *buffer = pfile->buffer;
636 unsigned char *dest, *limit;
637 cppchar_t c;
638 bool warned_nulls = false, warned_multi = false;
640 dest = BUFF_FRONT (pfile->u_buff);
641 limit = BUFF_LIMIT (pfile->u_buff);
643 for (;;)
645 /* We need room for another char, possibly the terminating NUL. */
646 if ((size_t) (limit - dest) < 1)
648 size_t len_so_far = dest - BUFF_FRONT (pfile->u_buff);
649 _cpp_extend_buff (pfile, &pfile->u_buff, 2);
650 dest = BUFF_FRONT (pfile->u_buff) + len_so_far;
651 limit = BUFF_LIMIT (pfile->u_buff);
654 /* Handle trigraphs, escaped newlines etc. */
655 c = *buffer->cur++;
656 if (c == '?' || c == '\\')
657 c = skip_escaped_newlines (pfile);
659 if (c == terminator)
661 if (unescaped_terminator_p (pfile, dest))
662 break;
664 else if (is_vspace (c))
666 /* In assembly language, silently terminate string and
667 character literals at end of line. This is a kludge
668 around not knowing where comments are. */
669 if (CPP_OPTION (pfile, lang) == CLK_ASM && terminator != '>')
671 buffer->cur--;
672 break;
675 /* Character constants and header names may not extend over
676 multiple lines. In Standard C, neither may strings.
677 Unfortunately, we accept multiline strings as an
678 extension, except in #include family directives. */
679 if (terminator != '"' || pfile->state.angled_headers)
681 unterminated (pfile, terminator);
682 buffer->cur--;
683 break;
686 if (!warned_multi)
688 warned_multi = true;
689 cpp_pedwarn (pfile, "multi-line string literals are deprecated");
692 if (pfile->mls_line == 0)
694 pfile->mls_line = token->line;
695 pfile->mls_col = token->col;
698 handle_newline (pfile);
699 c = '\n';
701 else if (c == '\0')
703 if (buffer->cur - 1 == buffer->rlimit)
705 unterminated (pfile, terminator);
706 buffer->cur--;
707 break;
709 if (!warned_nulls)
711 warned_nulls = true;
712 cpp_warning (pfile, "null character(s) preserved in literal");
716 *dest++ = c;
719 *dest = '\0';
721 token->val.str.text = BUFF_FRONT (pfile->u_buff);
722 token->val.str.len = dest - BUFF_FRONT (pfile->u_buff);
723 BUFF_FRONT (pfile->u_buff) = dest + 1;
726 /* The stored comment includes the comment start and any terminator. */
727 static void
728 save_comment (pfile, token, from)
729 cpp_reader *pfile;
730 cpp_token *token;
731 const unsigned char *from;
733 unsigned char *buffer;
734 unsigned int len;
736 len = pfile->buffer->cur - from + 1; /* + 1 for the initial '/'. */
738 /* C++ comments probably (not definitely) have moved past a new
739 line, which we don't want to save in the comment. */
740 if (is_vspace (pfile->buffer->cur[-1]))
741 len--;
742 buffer = _cpp_unaligned_alloc (pfile, len);
744 token->type = CPP_COMMENT;
745 token->val.str.len = len;
746 token->val.str.text = buffer;
748 buffer[0] = '/';
749 memcpy (buffer + 1, from, len - 1);
752 /* Allocate COUNT tokens for RUN. */
753 void
754 _cpp_init_tokenrun (run, count)
755 tokenrun *run;
756 unsigned int count;
758 run->base = xnewvec (cpp_token, count);
759 run->limit = run->base + count;
760 run->next = NULL;
763 /* Returns the next tokenrun, or creates one if there is none. */
764 static tokenrun *
765 next_tokenrun (run)
766 tokenrun *run;
768 if (run->next == NULL)
770 run->next = xnew (tokenrun);
771 run->next->prev = run;
772 _cpp_init_tokenrun (run->next, 250);
775 return run->next;
778 /* Allocate a single token that is invalidated at the same time as the
779 rest of the tokens on the line. Has its line and col set to the
780 same as the last lexed token, so that diagnostics appear in the
781 right place. */
782 cpp_token *
783 _cpp_temp_token (pfile)
784 cpp_reader *pfile;
786 cpp_token *old, *result;
788 old = pfile->cur_token - 1;
789 if (pfile->cur_token == pfile->cur_run->limit)
791 pfile->cur_run = next_tokenrun (pfile->cur_run);
792 pfile->cur_token = pfile->cur_run->base;
795 result = pfile->cur_token++;
796 result->line = old->line;
797 result->col = old->col;
798 return result;
801 /* Lex a token into RESULT (external interface). Takes care of issues
802 like directive handling, token lookahead, multiple include
803 optimization and skipping. */
804 const cpp_token *
805 _cpp_lex_token (pfile)
806 cpp_reader *pfile;
808 cpp_token *result;
810 for (;;)
812 if (pfile->cur_token == pfile->cur_run->limit)
814 pfile->cur_run = next_tokenrun (pfile->cur_run);
815 pfile->cur_token = pfile->cur_run->base;
818 if (pfile->lookaheads)
820 pfile->lookaheads--;
821 result = pfile->cur_token++;
823 else
824 result = _cpp_lex_direct (pfile);
826 if (result->flags & BOL)
828 /* Is this a directive. If _cpp_handle_directive returns
829 false, it is an assembler #. */
830 if (result->type == CPP_HASH
831 && !pfile->state.parsing_args
832 && _cpp_handle_directive (pfile, result->flags & PREV_WHITE))
833 continue;
834 if (pfile->cb.line_change && !pfile->state.skipping)
835 (*pfile->cb.line_change)(pfile, result, pfile->state.parsing_args);
838 /* We don't skip tokens in directives. */
839 if (pfile->state.in_directive)
840 break;
842 /* Outside a directive, invalidate controlling macros. At file
843 EOF, _cpp_lex_direct takes care of popping the buffer, so we never
844 get here and MI optimisation works. */
845 pfile->mi_valid = false;
847 if (!pfile->state.skipping || result->type == CPP_EOF)
848 break;
851 return result;
854 #define IF_NEXT_IS(CHAR, THEN_TYPE, ELSE_TYPE) \
855 do { \
856 if (get_effective_char (pfile) == CHAR) \
857 result->type = THEN_TYPE; \
858 else \
860 BACKUP (); \
861 result->type = ELSE_TYPE; \
863 } while (0)
865 /* Lex a token into pfile->cur_token, which is also incremented, to
866 get diagnostics pointing to the correct location.
868 Does not handle issues such as token lookahead, multiple-include
869 optimisation, directives, skipping etc. This function is only
870 suitable for use by _cpp_lex_token, and in special cases like
871 lex_expansion_token which doesn't care for any of these issues.
873 When meeting a newline, returns CPP_EOF if parsing a directive,
874 otherwise returns to the start of the token buffer if permissible.
875 Returns the location of the lexed token. */
876 cpp_token *
877 _cpp_lex_direct (pfile)
878 cpp_reader *pfile;
880 cppchar_t c;
881 cpp_buffer *buffer;
882 const unsigned char *comment_start;
883 cpp_token *result = pfile->cur_token++;
885 fresh_line:
886 buffer = pfile->buffer;
887 result->flags = buffer->saved_flags;
888 buffer->saved_flags = 0;
889 update_tokens_line:
890 result->line = pfile->line;
892 skipped_white:
893 c = *buffer->cur++;
894 result->col = CPP_BUF_COLUMN (buffer, buffer->cur);
896 trigraph:
897 switch (c)
899 case ' ': case '\t': case '\f': case '\v': case '\0':
900 result->flags |= PREV_WHITE;
901 if (skip_whitespace (pfile, c))
902 goto skipped_white;
904 /* EOF. */
905 buffer->cur--;
906 buffer->saved_flags = BOL;
907 if (!pfile->state.parsing_args && !pfile->state.in_directive)
909 if (buffer->cur != buffer->line_base)
911 /* Non-empty files should end in a newline. Don't warn
912 for command line and _Pragma buffers. */
913 if (!buffer->from_stage3)
914 cpp_pedwarn (pfile, "no newline at end of file");
915 handle_newline (pfile);
918 /* Don't pop the last buffer. */
919 if (buffer->prev)
921 unsigned char stop = buffer->return_at_eof;
923 _cpp_pop_buffer (pfile);
924 if (!stop)
925 goto fresh_line;
928 result->type = CPP_EOF;
929 break;
931 case '\n': case '\r':
932 handle_newline (pfile);
933 buffer->saved_flags = BOL;
934 if (! pfile->state.in_directive)
936 if (pfile->state.parsing_args == 2)
937 buffer->saved_flags |= PREV_WHITE;
938 if (!pfile->keep_tokens)
940 pfile->cur_run = &pfile->base_run;
941 result = pfile->base_run.base;
942 pfile->cur_token = result + 1;
944 goto fresh_line;
946 result->type = CPP_EOF;
947 break;
949 case '?':
950 case '\\':
951 /* These could start an escaped newline, or '?' a trigraph. Let
952 skip_escaped_newlines do all the work. */
954 unsigned int line = pfile->line;
956 c = skip_escaped_newlines (pfile);
957 if (line != pfile->line)
959 buffer->cur--;
960 /* We had at least one escaped newline of some sort.
961 Update the token's line and column. */
962 goto update_tokens_line;
966 /* We are either the original '?' or '\\', or a trigraph. */
967 if (c == '?')
968 result->type = CPP_QUERY;
969 else if (c == '\\')
970 goto random_char;
971 else
972 goto trigraph;
973 break;
975 case '0': case '1': case '2': case '3': case '4':
976 case '5': case '6': case '7': case '8': case '9':
977 result->type = CPP_NUMBER;
978 parse_number (pfile, &result->val.str, c, 0);
979 break;
981 case 'L':
982 /* 'L' may introduce wide characters or strings. */
984 const unsigned char *pos = buffer->cur;
986 c = get_effective_char (pfile);
987 if (c == '\'' || c == '"')
989 result->type = (c == '"' ? CPP_WSTRING: CPP_WCHAR);
990 parse_string (pfile, result, c);
991 break;
993 buffer->cur = pos;
995 /* Fall through. */
997 start_ident:
998 case '_':
999 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
1000 case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
1001 case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
1002 case 's': case 't': case 'u': case 'v': case 'w': case 'x':
1003 case 'y': case 'z':
1004 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
1005 case 'G': case 'H': case 'I': case 'J': case 'K':
1006 case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
1007 case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
1008 case 'Y': case 'Z':
1009 result->type = CPP_NAME;
1010 result->val.node = parse_identifier (pfile);
1012 /* Convert named operators to their proper types. */
1013 if (result->val.node->flags & NODE_OPERATOR)
1015 result->flags |= NAMED_OP;
1016 result->type = result->val.node->value.operator;
1018 break;
1020 case '\'':
1021 case '"':
1022 result->type = c == '"' ? CPP_STRING: CPP_CHAR;
1023 parse_string (pfile, result, c);
1024 break;
1026 case '/':
1027 /* A potential block or line comment. */
1028 comment_start = buffer->cur;
1029 c = get_effective_char (pfile);
1031 if (c == '*')
1033 if (skip_block_comment (pfile))
1034 cpp_error (pfile, "unterminated comment");
1036 else if (c == '/' && (CPP_OPTION (pfile, cplusplus_comments)
1037 || CPP_IN_SYSTEM_HEADER (pfile)))
1039 /* Warn about comments only if pedantically GNUC89, and not
1040 in system headers. */
1041 if (CPP_OPTION (pfile, lang) == CLK_GNUC89 && CPP_PEDANTIC (pfile)
1042 && ! buffer->warned_cplusplus_comments)
1044 cpp_pedwarn (pfile,
1045 "C++ style comments are not allowed in ISO C89");
1046 cpp_pedwarn (pfile,
1047 "(this will be reported only once per input file)");
1048 buffer->warned_cplusplus_comments = 1;
1051 if (skip_line_comment (pfile) && CPP_OPTION (pfile, warn_comments))
1052 cpp_warning (pfile, "multi-line comment");
1054 else if (c == '=')
1056 result->type = CPP_DIV_EQ;
1057 break;
1059 else
1061 BACKUP ();
1062 result->type = CPP_DIV;
1063 break;
1066 if (!pfile->state.save_comments)
1068 result->flags |= PREV_WHITE;
1069 goto update_tokens_line;
1072 /* Save the comment as a token in its own right. */
1073 save_comment (pfile, result, comment_start);
1074 break;
1076 case '<':
1077 if (pfile->state.angled_headers)
1079 result->type = CPP_HEADER_NAME;
1080 parse_string (pfile, result, '>');
1081 break;
1084 c = get_effective_char (pfile);
1085 if (c == '=')
1086 result->type = CPP_LESS_EQ;
1087 else if (c == '<')
1088 IF_NEXT_IS ('=', CPP_LSHIFT_EQ, CPP_LSHIFT);
1089 else if (c == '?' && CPP_OPTION (pfile, cplusplus))
1090 IF_NEXT_IS ('=', CPP_MIN_EQ, CPP_MIN);
1091 else if (c == ':' && CPP_OPTION (pfile, digraphs))
1093 result->type = CPP_OPEN_SQUARE;
1094 result->flags |= DIGRAPH;
1096 else if (c == '%' && CPP_OPTION (pfile, digraphs))
1098 result->type = CPP_OPEN_BRACE;
1099 result->flags |= DIGRAPH;
1101 else
1103 BACKUP ();
1104 result->type = CPP_LESS;
1106 break;
1108 case '>':
1109 c = get_effective_char (pfile);
1110 if (c == '=')
1111 result->type = CPP_GREATER_EQ;
1112 else if (c == '>')
1113 IF_NEXT_IS ('=', CPP_RSHIFT_EQ, CPP_RSHIFT);
1114 else if (c == '?' && CPP_OPTION (pfile, cplusplus))
1115 IF_NEXT_IS ('=', CPP_MAX_EQ, CPP_MAX);
1116 else
1118 BACKUP ();
1119 result->type = CPP_GREATER;
1121 break;
1123 case '%':
1124 c = get_effective_char (pfile);
1125 if (c == '=')
1126 result->type = CPP_MOD_EQ;
1127 else if (CPP_OPTION (pfile, digraphs) && c == ':')
1129 result->flags |= DIGRAPH;
1130 result->type = CPP_HASH;
1131 if (get_effective_char (pfile) == '%')
1133 const unsigned char *pos = buffer->cur;
1135 if (get_effective_char (pfile) == ':')
1136 result->type = CPP_PASTE;
1137 else
1138 buffer->cur = pos - 1;
1140 else
1141 BACKUP ();
1143 else if (CPP_OPTION (pfile, digraphs) && c == '>')
1145 result->flags |= DIGRAPH;
1146 result->type = CPP_CLOSE_BRACE;
1148 else
1150 BACKUP ();
1151 result->type = CPP_MOD;
1153 break;
1155 case '.':
1156 result->type = CPP_DOT;
1157 c = get_effective_char (pfile);
1158 if (c == '.')
1160 const unsigned char *pos = buffer->cur;
1162 if (get_effective_char (pfile) == '.')
1163 result->type = CPP_ELLIPSIS;
1164 else
1165 buffer->cur = pos - 1;
1167 /* All known character sets have 0...9 contiguous. */
1168 else if (ISDIGIT (c))
1170 result->type = CPP_NUMBER;
1171 parse_number (pfile, &result->val.str, c, 1);
1173 else if (c == '*' && CPP_OPTION (pfile, cplusplus))
1174 result->type = CPP_DOT_STAR;
1175 else
1176 BACKUP ();
1177 break;
1179 case '+':
1180 c = get_effective_char (pfile);
1181 if (c == '+')
1182 result->type = CPP_PLUS_PLUS;
1183 else if (c == '=')
1184 result->type = CPP_PLUS_EQ;
1185 else
1187 BACKUP ();
1188 result->type = CPP_PLUS;
1190 break;
1192 case '-':
1193 c = get_effective_char (pfile);
1194 if (c == '>')
1196 result->type = CPP_DEREF;
1197 if (CPP_OPTION (pfile, cplusplus))
1199 if (get_effective_char (pfile) == '*')
1200 result->type = CPP_DEREF_STAR;
1201 else
1202 BACKUP ();
1205 else if (c == '-')
1206 result->type = CPP_MINUS_MINUS;
1207 else if (c == '=')
1208 result->type = CPP_MINUS_EQ;
1209 else
1211 BACKUP ();
1212 result->type = CPP_MINUS;
1214 break;
1216 case '&':
1217 c = get_effective_char (pfile);
1218 if (c == '&')
1219 result->type = CPP_AND_AND;
1220 else if (c == '=')
1221 result->type = CPP_AND_EQ;
1222 else
1224 BACKUP ();
1225 result->type = CPP_AND;
1227 break;
1229 case '|':
1230 c = get_effective_char (pfile);
1231 if (c == '|')
1232 result->type = CPP_OR_OR;
1233 else if (c == '=')
1234 result->type = CPP_OR_EQ;
1235 else
1237 BACKUP ();
1238 result->type = CPP_OR;
1240 break;
1242 case ':':
1243 c = get_effective_char (pfile);
1244 if (c == ':' && CPP_OPTION (pfile, cplusplus))
1245 result->type = CPP_SCOPE;
1246 else if (c == '>' && CPP_OPTION (pfile, digraphs))
1248 result->flags |= DIGRAPH;
1249 result->type = CPP_CLOSE_SQUARE;
1251 else
1253 BACKUP ();
1254 result->type = CPP_COLON;
1256 break;
1258 case '*': IF_NEXT_IS ('=', CPP_MULT_EQ, CPP_MULT); break;
1259 case '=': IF_NEXT_IS ('=', CPP_EQ_EQ, CPP_EQ); break;
1260 case '!': IF_NEXT_IS ('=', CPP_NOT_EQ, CPP_NOT); break;
1261 case '^': IF_NEXT_IS ('=', CPP_XOR_EQ, CPP_XOR); break;
1262 case '#': IF_NEXT_IS ('#', CPP_PASTE, CPP_HASH); break;
1264 case '~': result->type = CPP_COMPL; break;
1265 case ',': result->type = CPP_COMMA; break;
1266 case '(': result->type = CPP_OPEN_PAREN; break;
1267 case ')': result->type = CPP_CLOSE_PAREN; break;
1268 case '[': result->type = CPP_OPEN_SQUARE; break;
1269 case ']': result->type = CPP_CLOSE_SQUARE; break;
1270 case '{': result->type = CPP_OPEN_BRACE; break;
1271 case '}': result->type = CPP_CLOSE_BRACE; break;
1272 case ';': result->type = CPP_SEMICOLON; break;
1274 /* @ is a punctuator in Objective C. */
1275 case '@': result->type = CPP_ATSIGN; break;
1277 case '$':
1278 if (CPP_OPTION (pfile, dollars_in_ident))
1279 goto start_ident;
1280 /* Fall through... */
1282 random_char:
1283 default:
1284 result->type = CPP_OTHER;
1285 result->val.c = c;
1286 break;
1289 return result;
1292 /* An upper bound on the number of bytes needed to spell a token,
1293 including preceding whitespace. */
1294 unsigned int
1295 cpp_token_len (token)
1296 const cpp_token *token;
1298 unsigned int len;
1300 switch (TOKEN_SPELL (token))
1302 default: len = 0; break;
1303 case SPELL_NUMBER:
1304 case SPELL_STRING: len = token->val.str.len; break;
1305 case SPELL_IDENT: len = NODE_LEN (token->val.node); break;
1307 /* 1 for whitespace, 4 for comment delimiters. */
1308 return len + 5;
1311 /* Write the spelling of a token TOKEN to BUFFER. The buffer must
1312 already contain the enough space to hold the token's spelling.
1313 Returns a pointer to the character after the last character
1314 written. */
1315 unsigned char *
1316 cpp_spell_token (pfile, token, buffer)
1317 cpp_reader *pfile; /* Would be nice to be rid of this... */
1318 const cpp_token *token;
1319 unsigned char *buffer;
1321 switch (TOKEN_SPELL (token))
1323 case SPELL_OPERATOR:
1325 const unsigned char *spelling;
1326 unsigned char c;
1328 if (token->flags & DIGRAPH)
1329 spelling
1330 = digraph_spellings[(int) token->type - (int) CPP_FIRST_DIGRAPH];
1331 else if (token->flags & NAMED_OP)
1332 goto spell_ident;
1333 else
1334 spelling = TOKEN_NAME (token);
1336 while ((c = *spelling++) != '\0')
1337 *buffer++ = c;
1339 break;
1341 case SPELL_CHAR:
1342 *buffer++ = token->val.c;
1343 break;
1345 spell_ident:
1346 case SPELL_IDENT:
1347 memcpy (buffer, NODE_NAME (token->val.node), NODE_LEN (token->val.node));
1348 buffer += NODE_LEN (token->val.node);
1349 break;
1351 case SPELL_NUMBER:
1352 memcpy (buffer, token->val.str.text, token->val.str.len);
1353 buffer += token->val.str.len;
1354 break;
1356 case SPELL_STRING:
1358 int left, right, tag;
1359 switch (token->type)
1361 case CPP_STRING: left = '"'; right = '"'; tag = '\0'; break;
1362 case CPP_WSTRING: left = '"'; right = '"'; tag = 'L'; break;
1363 case CPP_CHAR: left = '\''; right = '\''; tag = '\0'; break;
1364 case CPP_WCHAR: left = '\''; right = '\''; tag = 'L'; break;
1365 case CPP_HEADER_NAME: left = '<'; right = '>'; tag = '\0'; break;
1366 default:
1367 cpp_ice (pfile, "unknown string token %s\n", TOKEN_NAME (token));
1368 return buffer;
1370 if (tag) *buffer++ = tag;
1371 *buffer++ = left;
1372 memcpy (buffer, token->val.str.text, token->val.str.len);
1373 buffer += token->val.str.len;
1374 *buffer++ = right;
1376 break;
1378 case SPELL_NONE:
1379 cpp_ice (pfile, "unspellable token %s", TOKEN_NAME (token));
1380 break;
1383 return buffer;
1386 /* Returns a token as a null-terminated string. The string is
1387 temporary, and automatically freed later. Useful for diagnostics. */
1388 unsigned char *
1389 cpp_token_as_text (pfile, token)
1390 cpp_reader *pfile;
1391 const cpp_token *token;
1393 unsigned int len = cpp_token_len (token);
1394 unsigned char *start = _cpp_unaligned_alloc (pfile, len), *end;
1396 end = cpp_spell_token (pfile, token, start);
1397 end[0] = '\0';
1399 return start;
1402 /* Used by C front ends. Should really move to using cpp_token_as_text. */
1403 const char *
1404 cpp_type2name (type)
1405 enum cpp_ttype type;
1407 return (const char *) token_spellings[type].name;
1410 /* Writes the spelling of token to FP, without any preceding space.
1411 Separated from cpp_spell_token for efficiency - to avoid stdio
1412 double-buffering. */
1413 void
1414 cpp_output_token (token, fp)
1415 const cpp_token *token;
1416 FILE *fp;
1418 switch (TOKEN_SPELL (token))
1420 case SPELL_OPERATOR:
1422 const unsigned char *spelling;
1423 int c;
1425 if (token->flags & DIGRAPH)
1426 spelling
1427 = digraph_spellings[(int) token->type - (int) CPP_FIRST_DIGRAPH];
1428 else if (token->flags & NAMED_OP)
1429 goto spell_ident;
1430 else
1431 spelling = TOKEN_NAME (token);
1433 c = *spelling;
1435 putc (c, fp);
1436 while ((c = *++spelling) != '\0');
1438 break;
1440 case SPELL_CHAR:
1441 putc (token->val.c, fp);
1442 break;
1444 spell_ident:
1445 case SPELL_IDENT:
1446 fwrite (NODE_NAME (token->val.node), 1, NODE_LEN (token->val.node), fp);
1447 break;
1449 case SPELL_NUMBER:
1450 fwrite (token->val.str.text, 1, token->val.str.len, fp);
1451 break;
1453 case SPELL_STRING:
1455 int left, right, tag;
1456 switch (token->type)
1458 case CPP_STRING: left = '"'; right = '"'; tag = '\0'; break;
1459 case CPP_WSTRING: left = '"'; right = '"'; tag = 'L'; break;
1460 case CPP_CHAR: left = '\''; right = '\''; tag = '\0'; break;
1461 case CPP_WCHAR: left = '\''; right = '\''; tag = 'L'; break;
1462 case CPP_HEADER_NAME: left = '<'; right = '>'; tag = '\0'; break;
1463 default:
1464 fprintf (stderr, "impossible STRING token %s\n", TOKEN_NAME (token));
1465 return;
1467 if (tag) putc (tag, fp);
1468 putc (left, fp);
1469 fwrite (token->val.str.text, 1, token->val.str.len, fp);
1470 putc (right, fp);
1472 break;
1474 case SPELL_NONE:
1475 /* An error, most probably. */
1476 break;
1480 /* Compare two tokens. */
1482 _cpp_equiv_tokens (a, b)
1483 const cpp_token *a, *b;
1485 if (a->type == b->type && a->flags == b->flags)
1486 switch (TOKEN_SPELL (a))
1488 default: /* Keep compiler happy. */
1489 case SPELL_OPERATOR:
1490 return 1;
1491 case SPELL_CHAR:
1492 return a->val.c == b->val.c; /* Character. */
1493 case SPELL_NONE:
1494 return (a->type != CPP_MACRO_ARG || a->val.arg_no == b->val.arg_no);
1495 case SPELL_IDENT:
1496 return a->val.node == b->val.node;
1497 case SPELL_NUMBER:
1498 case SPELL_STRING:
1499 return (a->val.str.len == b->val.str.len
1500 && !memcmp (a->val.str.text, b->val.str.text,
1501 a->val.str.len));
1504 return 0;
1507 /* Returns nonzero if a space should be inserted to avoid an
1508 accidental token paste for output. For simplicity, it is
1509 conservative, and occasionally advises a space where one is not
1510 needed, e.g. "." and ".2". */
1513 cpp_avoid_paste (pfile, token1, token2)
1514 cpp_reader *pfile;
1515 const cpp_token *token1, *token2;
1517 enum cpp_ttype a = token1->type, b = token2->type;
1518 cppchar_t c;
1520 if (token1->flags & NAMED_OP)
1521 a = CPP_NAME;
1522 if (token2->flags & NAMED_OP)
1523 b = CPP_NAME;
1525 c = EOF;
1526 if (token2->flags & DIGRAPH)
1527 c = digraph_spellings[(int) b - (int) CPP_FIRST_DIGRAPH][0];
1528 else if (token_spellings[b].category == SPELL_OPERATOR)
1529 c = token_spellings[b].name[0];
1531 /* Quickly get everything that can paste with an '='. */
1532 if ((int) a <= (int) CPP_LAST_EQ && c == '=')
1533 return 1;
1535 switch (a)
1537 case CPP_GREATER: return c == '>' || c == '?';
1538 case CPP_LESS: return c == '<' || c == '?' || c == '%' || c == ':';
1539 case CPP_PLUS: return c == '+';
1540 case CPP_MINUS: return c == '-' || c == '>';
1541 case CPP_DIV: return c == '/' || c == '*'; /* Comments. */
1542 case CPP_MOD: return c == ':' || c == '>';
1543 case CPP_AND: return c == '&';
1544 case CPP_OR: return c == '|';
1545 case CPP_COLON: return c == ':' || c == '>';
1546 case CPP_DEREF: return c == '*';
1547 case CPP_DOT: return c == '.' || c == '%' || b == CPP_NUMBER;
1548 case CPP_HASH: return c == '#' || c == '%'; /* Digraph form. */
1549 case CPP_NAME: return ((b == CPP_NUMBER
1550 && name_p (pfile, &token2->val.str))
1551 || b == CPP_NAME
1552 || b == CPP_CHAR || b == CPP_STRING); /* L */
1553 case CPP_NUMBER: return (b == CPP_NUMBER || b == CPP_NAME
1554 || c == '.' || c == '+' || c == '-');
1555 case CPP_OTHER: return (CPP_OPTION (pfile, objc)
1556 && token1->val.c == '@'
1557 && (b == CPP_NAME || b == CPP_STRING));
1558 default: break;
1561 return 0;
1564 /* Output all the remaining tokens on the current line, and a newline
1565 character, to FP. Leading whitespace is removed. If there are
1566 macros, special token padding is not performed. */
1567 void
1568 cpp_output_line (pfile, fp)
1569 cpp_reader *pfile;
1570 FILE *fp;
1572 const cpp_token *token;
1574 token = cpp_get_token (pfile);
1575 while (token->type != CPP_EOF)
1577 cpp_output_token (token, fp);
1578 token = cpp_get_token (pfile);
1579 if (token->flags & PREV_WHITE)
1580 putc (' ', fp);
1583 putc ('\n', fp);
1586 /* Returns the value of a hexadecimal digit. */
1587 static unsigned int
1588 hex_digit_value (c)
1589 unsigned int c;
1591 if (hex_p (c))
1592 return hex_value (c);
1593 else
1594 abort ();
1597 /* Parse a '\uNNNN' or '\UNNNNNNNN' sequence. Returns 1 to indicate
1598 failure if cpplib is not parsing C++ or C99. Such failure is
1599 silent, and no variables are updated. Otherwise returns 0, and
1600 warns if -Wtraditional.
1602 [lex.charset]: The character designated by the universal character
1603 name \UNNNNNNNN is that character whose character short name in
1604 ISO/IEC 10646 is NNNNNNNN; the character designated by the
1605 universal character name \uNNNN is that character whose character
1606 short name in ISO/IEC 10646 is 0000NNNN. If the hexadecimal value
1607 for a universal character name is less than 0x20 or in the range
1608 0x7F-0x9F (inclusive), or if the universal character name
1609 designates a character in the basic source character set, then the
1610 program is ill-formed.
1612 We assume that wchar_t is Unicode, so we don't need to do any
1613 mapping. Is this ever wrong?
1615 PC points to the 'u' or 'U', PSTR is points to the byte after PC,
1616 LIMIT is the end of the string or charconst. PSTR is updated to
1617 point after the UCS on return, and the UCS is written into PC. */
1619 static int
1620 maybe_read_ucs (pfile, pstr, limit, pc)
1621 cpp_reader *pfile;
1622 const unsigned char **pstr;
1623 const unsigned char *limit;
1624 unsigned int *pc;
1626 const unsigned char *p = *pstr;
1627 unsigned int code = 0;
1628 unsigned int c = *pc, length;
1630 /* Only attempt to interpret a UCS for C++ and C99. */
1631 if (! (CPP_OPTION (pfile, cplusplus) || CPP_OPTION (pfile, c99)))
1632 return 1;
1634 if (CPP_WTRADITIONAL (pfile))
1635 cpp_warning (pfile, "the meaning of '\\%c' varies with -traditional", c);
1637 length = (c == 'u' ? 4: 8);
1639 if ((size_t) (limit - p) < length)
1641 cpp_error (pfile, "incomplete universal-character-name");
1642 /* Skip to the end to avoid more diagnostics. */
1643 p = limit;
1645 else
1647 for (; length; length--, p++)
1649 c = *p;
1650 if (ISXDIGIT (c))
1651 code = (code << 4) + hex_digit_value (c);
1652 else
1654 cpp_error (pfile,
1655 "non-hex digit '%c' in universal-character-name", c);
1656 /* We shouldn't skip in case there are multibyte chars. */
1657 break;
1662 #ifdef TARGET_EBCDIC
1663 cpp_error (pfile, "universal-character-name on EBCDIC target");
1664 code = 0x3f; /* EBCDIC invalid character */
1665 #else
1666 /* True extended characters are OK. */
1667 if (code >= 0xa0
1668 && !(code & 0x80000000)
1669 && !(code >= 0xD800 && code <= 0xDFFF))
1671 /* The standard permits $, @ and ` to be specified as UCNs. We use
1672 hex escapes so that this also works with EBCDIC hosts. */
1673 else if (code == 0x24 || code == 0x40 || code == 0x60)
1675 /* Don't give another error if one occurred above. */
1676 else if (length == 0)
1677 cpp_error (pfile, "universal-character-name out of range");
1678 #endif
1680 *pstr = p;
1681 *pc = code;
1682 return 0;
1685 /* Interpret an escape sequence, and return its value. PSTR points to
1686 the input pointer, which is just after the backslash. LIMIT is how
1687 much text we have. MASK is a bitmask for the precision for the
1688 destination type (char or wchar_t). TRADITIONAL, if true, does not
1689 interpret escapes that did not exist in traditional C.
1691 Handles all relevant diagnostics. */
1693 unsigned int
1694 cpp_parse_escape (pfile, pstr, limit, mask, traditional)
1695 cpp_reader *pfile;
1696 const unsigned char **pstr;
1697 const unsigned char *limit;
1698 unsigned HOST_WIDE_INT mask;
1699 int traditional;
1701 int unknown = 0;
1702 const unsigned char *str = *pstr;
1703 unsigned int c = *str++;
1705 switch (c)
1707 case '\\': case '\'': case '"': case '?': break;
1708 case 'b': c = TARGET_BS; break;
1709 case 'f': c = TARGET_FF; break;
1710 case 'n': c = TARGET_NEWLINE; break;
1711 case 'r': c = TARGET_CR; break;
1712 case 't': c = TARGET_TAB; break;
1713 case 'v': c = TARGET_VT; break;
1715 case '(': case '{': case '[': case '%':
1716 /* '\(', etc, are used at beginning of line to avoid confusing Emacs.
1717 '\%' is used to prevent SCCS from getting confused. */
1718 unknown = CPP_PEDANTIC (pfile);
1719 break;
1721 case 'a':
1722 if (CPP_WTRADITIONAL (pfile))
1723 cpp_warning (pfile, "the meaning of '\\a' varies with -traditional");
1724 if (!traditional)
1725 c = TARGET_BELL;
1726 break;
1728 case 'e': case 'E':
1729 if (CPP_PEDANTIC (pfile))
1730 cpp_pedwarn (pfile, "non-ISO-standard escape sequence, '\\%c'", c);
1731 c = TARGET_ESC;
1732 break;
1734 case 'u': case 'U':
1735 unknown = maybe_read_ucs (pfile, &str, limit, &c);
1736 break;
1738 case 'x':
1739 if (CPP_WTRADITIONAL (pfile))
1740 cpp_warning (pfile, "the meaning of '\\x' varies with -traditional");
1742 if (!traditional)
1744 unsigned int i = 0, overflow = 0;
1745 int digits_found = 0;
1747 while (str < limit)
1749 c = *str;
1750 if (! ISXDIGIT (c))
1751 break;
1752 str++;
1753 overflow |= i ^ (i << 4 >> 4);
1754 i = (i << 4) + hex_digit_value (c);
1755 digits_found = 1;
1758 if (!digits_found)
1759 cpp_error (pfile, "\\x used with no following hex digits");
1761 if (overflow | (i != (i & mask)))
1763 cpp_pedwarn (pfile, "hex escape sequence out of range");
1764 i &= mask;
1766 c = i;
1768 break;
1770 case '0': case '1': case '2': case '3':
1771 case '4': case '5': case '6': case '7':
1773 unsigned int i = c - '0';
1774 int count = 0;
1776 while (str < limit && ++count < 3)
1778 c = *str;
1779 if (c < '0' || c > '7')
1780 break;
1781 str++;
1782 i = (i << 3) + c - '0';
1785 if (i != (i & mask))
1787 cpp_pedwarn (pfile, "octal escape sequence out of range");
1788 i &= mask;
1790 c = i;
1792 break;
1794 default:
1795 unknown = 1;
1796 break;
1799 if (unknown)
1801 if (ISGRAPH (c))
1802 cpp_pedwarn (pfile, "unknown escape sequence '\\%c'", c);
1803 else
1804 cpp_pedwarn (pfile, "unknown escape sequence: '\\%03o'", c);
1807 if (c > mask)
1808 cpp_pedwarn (pfile, "escape sequence out of range for character");
1810 *pstr = str;
1811 return c;
1814 #ifndef MAX_CHAR_TYPE_SIZE
1815 #define MAX_CHAR_TYPE_SIZE CHAR_TYPE_SIZE
1816 #endif
1818 #ifndef MAX_WCHAR_TYPE_SIZE
1819 #define MAX_WCHAR_TYPE_SIZE WCHAR_TYPE_SIZE
1820 #endif
1822 /* Interpret a (possibly wide) character constant in TOKEN.
1823 WARN_MULTI warns about multi-character charconsts, if not
1824 TRADITIONAL. TRADITIONAL also indicates not to interpret escapes
1825 that did not exist in traditional C. PCHARS_SEEN points to a
1826 variable that is filled in with the number of characters seen. */
1827 HOST_WIDE_INT
1828 cpp_interpret_charconst (pfile, token, warn_multi, traditional, pchars_seen)
1829 cpp_reader *pfile;
1830 const cpp_token *token;
1831 int warn_multi;
1832 int traditional;
1833 unsigned int *pchars_seen;
1835 const unsigned char *str = token->val.str.text;
1836 const unsigned char *limit = str + token->val.str.len;
1837 unsigned int chars_seen = 0;
1838 unsigned int width, max_chars, c;
1839 unsigned HOST_WIDE_INT mask;
1840 HOST_WIDE_INT result = 0;
1842 #ifdef MULTIBYTE_CHARS
1843 (void) local_mbtowc (NULL, NULL, 0);
1844 #endif
1846 /* Width in bits. */
1847 if (token->type == CPP_CHAR)
1848 width = MAX_CHAR_TYPE_SIZE;
1849 else
1850 width = MAX_WCHAR_TYPE_SIZE;
1852 if (width < HOST_BITS_PER_WIDE_INT)
1853 mask = ((unsigned HOST_WIDE_INT) 1 << width) - 1;
1854 else
1855 mask = ~0;
1856 max_chars = HOST_BITS_PER_WIDE_INT / width;
1858 while (str < limit)
1860 #ifdef MULTIBYTE_CHARS
1861 wchar_t wc;
1862 int char_len;
1864 char_len = local_mbtowc (&wc, str, limit - str);
1865 if (char_len == -1)
1867 cpp_warning (pfile, "ignoring invalid multibyte character");
1868 c = *str++;
1870 else
1872 str += char_len;
1873 c = wc;
1875 #else
1876 c = *str++;
1877 #endif
1879 if (c == '\\')
1880 c = cpp_parse_escape (pfile, &str, limit, mask, traditional);
1882 #ifdef MAP_CHARACTER
1883 if (ISPRINT (c))
1884 c = MAP_CHARACTER (c);
1885 #endif
1887 /* Merge character into result; ignore excess chars. */
1888 if (++chars_seen <= max_chars)
1890 if (width < HOST_BITS_PER_WIDE_INT)
1891 result = (result << width) | (c & mask);
1892 else
1893 result = c;
1897 if (chars_seen == 0)
1898 cpp_error (pfile, "empty character constant");
1899 else if (chars_seen > max_chars)
1901 chars_seen = max_chars;
1902 cpp_warning (pfile, "character constant too long");
1904 else if (chars_seen > 1 && !traditional && warn_multi)
1905 cpp_warning (pfile, "multi-character character constant");
1907 /* If char type is signed, sign-extend the constant. The
1908 __CHAR_UNSIGNED__ macro is set by the driver if appropriate. */
1909 if (token->type == CPP_CHAR && chars_seen)
1911 unsigned int nbits = chars_seen * width;
1913 mask = (unsigned HOST_WIDE_INT) ~0 >> (HOST_BITS_PER_WIDE_INT - nbits);
1914 if (pfile->spec_nodes.n__CHAR_UNSIGNED__->type == NT_MACRO
1915 || ((result >> (nbits - 1)) & 1) == 0)
1916 result &= mask;
1917 else
1918 result |= ~mask;
1921 *pchars_seen = chars_seen;
1922 return result;
1925 /* Memory buffers. Changing these three constants can have a dramatic
1926 effect on performance. The values here are reasonable defaults,
1927 but might be tuned. If you adjust them, be sure to test across a
1928 range of uses of cpplib, including heavy nested function-like macro
1929 expansion. Also check the change in peak memory usage (NJAMD is a
1930 good tool for this). */
1931 #define MIN_BUFF_SIZE 8000
1932 #define BUFF_SIZE_UPPER_BOUND(MIN_SIZE) (MIN_BUFF_SIZE + (MIN_SIZE) * 3 / 2)
1933 #define EXTENDED_BUFF_SIZE(BUFF, MIN_EXTRA) \
1934 (MIN_EXTRA + ((BUFF)->limit - (BUFF)->cur) * 2)
1936 #if MIN_BUFF_SIZE > BUFF_SIZE_UPPER_BOUND (0)
1937 #error BUFF_SIZE_UPPER_BOUND must be at least as large as MIN_BUFF_SIZE!
1938 #endif
1940 struct dummy
1942 char c;
1943 union
1945 double d;
1946 int *p;
1947 } u;
1950 #define DEFAULT_ALIGNMENT (offsetof (struct dummy, u))
1951 #define CPP_ALIGN(size, align) (((size) + ((align) - 1)) & ~((align) - 1))
1953 /* Create a new allocation buffer. Place the control block at the end
1954 of the buffer, so that buffer overflows will cause immediate chaos. */
1955 static _cpp_buff *
1956 new_buff (len)
1957 size_t len;
1959 _cpp_buff *result;
1960 unsigned char *base;
1962 if (len < MIN_BUFF_SIZE)
1963 len = MIN_BUFF_SIZE;
1964 len = CPP_ALIGN (len, DEFAULT_ALIGNMENT);
1966 base = xmalloc (len + sizeof (_cpp_buff));
1967 result = (_cpp_buff *) (base + len);
1968 result->base = base;
1969 result->cur = base;
1970 result->limit = base + len;
1971 result->next = NULL;
1972 return result;
1975 /* Place a chain of unwanted allocation buffers on the free list. */
1976 void
1977 _cpp_release_buff (pfile, buff)
1978 cpp_reader *pfile;
1979 _cpp_buff *buff;
1981 _cpp_buff *end = buff;
1983 while (end->next)
1984 end = end->next;
1985 end->next = pfile->free_buffs;
1986 pfile->free_buffs = buff;
1989 /* Return a free buffer of size at least MIN_SIZE. */
1990 _cpp_buff *
1991 _cpp_get_buff (pfile, min_size)
1992 cpp_reader *pfile;
1993 size_t min_size;
1995 _cpp_buff *result, **p;
1997 for (p = &pfile->free_buffs;; p = &(*p)->next)
1999 size_t size;
2001 if (*p == NULL)
2002 return new_buff (min_size);
2003 result = *p;
2004 size = result->limit - result->base;
2005 /* Return a buffer that's big enough, but don't waste one that's
2006 way too big. */
2007 if (size >= min_size && size <= BUFF_SIZE_UPPER_BOUND (min_size))
2008 break;
2011 *p = result->next;
2012 result->next = NULL;
2013 result->cur = result->base;
2014 return result;
2017 /* Creates a new buffer with enough space to hold the uncommitted
2018 remaining bytes of BUFF, and at least MIN_EXTRA more bytes. Copies
2019 the excess bytes to the new buffer. Chains the new buffer after
2020 BUFF, and returns the new buffer. */
2021 _cpp_buff *
2022 _cpp_append_extend_buff (pfile, buff, min_extra)
2023 cpp_reader *pfile;
2024 _cpp_buff *buff;
2025 size_t min_extra;
2027 size_t size = EXTENDED_BUFF_SIZE (buff, min_extra);
2028 _cpp_buff *new_buff = _cpp_get_buff (pfile, size);
2030 buff->next = new_buff;
2031 memcpy (new_buff->base, buff->cur, BUFF_ROOM (buff));
2032 return new_buff;
2035 /* Creates a new buffer with enough space to hold the uncommitted
2036 remaining bytes of the buffer pointed to by BUFF, and at least
2037 MIN_EXTRA more bytes. Copies the excess bytes to the new buffer.
2038 Chains the new buffer before the buffer pointed to by BUFF, and
2039 updates the pointer to point to the new buffer. */
2040 void
2041 _cpp_extend_buff (pfile, pbuff, min_extra)
2042 cpp_reader *pfile;
2043 _cpp_buff **pbuff;
2044 size_t min_extra;
2046 _cpp_buff *new_buff, *old_buff = *pbuff;
2047 size_t size = EXTENDED_BUFF_SIZE (old_buff, min_extra);
2049 new_buff = _cpp_get_buff (pfile, size);
2050 memcpy (new_buff->base, old_buff->cur, BUFF_ROOM (old_buff));
2051 new_buff->next = old_buff;
2052 *pbuff = new_buff;
2055 /* Free a chain of buffers starting at BUFF. */
2056 void
2057 _cpp_free_buff (buff)
2058 _cpp_buff *buff;
2060 _cpp_buff *next;
2062 for (; buff; buff = next)
2064 next = buff->next;
2065 free (buff->base);
2069 /* Allocate permanent, unaligned storage of length LEN. */
2070 unsigned char *
2071 _cpp_unaligned_alloc (pfile, len)
2072 cpp_reader *pfile;
2073 size_t len;
2075 _cpp_buff *buff = pfile->u_buff;
2076 unsigned char *result = buff->cur;
2078 if (len > (size_t) (buff->limit - result))
2080 buff = _cpp_get_buff (pfile, len);
2081 buff->next = pfile->u_buff;
2082 pfile->u_buff = buff;
2083 result = buff->cur;
2086 buff->cur = result + len;
2087 return result;
2090 /* Allocate permanent, unaligned storage of length LEN from a_buff.
2091 That buffer is used for growing allocations when saving macro
2092 replacement lists in a #define, and when parsing an answer to an
2093 assertion in #assert, #unassert or #if (and therefore possibly
2094 whilst expanding macros). It therefore must not be used by any
2095 code that they might call: specifically the lexer and the guts of
2096 the macro expander.
2098 All existing other uses clearly fit this restriction: storing
2099 registered pragmas during initialization. */
2100 unsigned char *
2101 _cpp_aligned_alloc (pfile, len)
2102 cpp_reader *pfile;
2103 size_t len;
2105 _cpp_buff *buff = pfile->a_buff;
2106 unsigned char *result = buff->cur;
2108 if (len > (size_t) (buff->limit - result))
2110 buff = _cpp_get_buff (pfile, len);
2111 buff->next = pfile->a_buff;
2112 pfile->a_buff = buff;
2113 result = buff->cur;
2116 buff->cur = result + len;
2117 return result;