* Makefile.in: Rebuilt.
[official-gcc.git] / gcc / cpplex.c
blob30f739da66ac41c590c04a3b85cadc7c5a16ebd9
1 /* CPP Library - lexical analysis.
2 Copyright (C) 2000 Free Software Foundation, Inc.
3 Contributed by Per Bothner, 1994-95.
4 Based on CCCP program by Paul Rubin, June 1986
5 Adapted to ANSI C, Richard Stallman, Jan 1987
6 Broken out to separate file, Zack Weinberg, Mar 2000
7 Single-pass line tokenization by Neil Booth, April 2000
9 This program is free software; you can redistribute it and/or modify it
10 under the terms of the GNU General Public License as published by the
11 Free Software Foundation; either version 2, or (at your option) any
12 later version.
14 This program is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
19 You should have received a copy of the GNU General Public License
20 along with this program; if not, write to the Free Software
21 Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
23 /* This lexer works with a single pass of the file. Recently I
24 re-wrote it to minimize the places where we step backwards in the
25 input stream, to make future changes to support multi-byte
26 character sets fairly straight-forward.
28 There is now only one routine where we do step backwards:
29 skip_escaped_newlines. This routine could probably also be changed
30 so that it doesn't need to step back. One possibility is to use a
31 trick similar to that used in lex_period and lex_percent. Two
32 extra characters might be needed, but skip_escaped_newlines itself
33 would probably be the only place that needs to be aware of that,
34 and changes to the remaining routines would probably only be needed
35 if they process a backslash. */
37 #include "config.h"
38 #include "system.h"
39 #include "cpplib.h"
40 #include "cpphash.h"
42 /* MULTIBYTE_CHARS support only works for native compilers.
43 ??? Ideally what we want is to model widechar support after
44 the current floating point support. */
45 #ifdef CROSS_COMPILE
46 #undef MULTIBYTE_CHARS
47 #endif
49 #ifdef MULTIBYTE_CHARS
50 #include "mbchar.h"
51 #include <locale.h>
52 #endif
54 /* Tokens with SPELL_STRING store their spelling in the token list,
55 and it's length in the token->val.name.len. */
56 enum spell_type
58 SPELL_OPERATOR = 0,
59 SPELL_CHAR,
60 SPELL_IDENT,
61 SPELL_STRING,
62 SPELL_NONE
65 struct token_spelling
67 enum spell_type category;
68 const unsigned char *name;
71 const unsigned char *digraph_spellings [] = {U"%:", U"%:%:", U"<:",
72 U":>", U"<%", U"%>"};
74 #define OP(e, s) { SPELL_OPERATOR, U s },
75 #define TK(e, s) { s, U STRINGX (e) },
76 const struct token_spelling token_spellings [N_TTYPES] = {TTYPE_TABLE };
77 #undef OP
78 #undef TK
80 #define TOKEN_SPELL(token) (token_spellings[(token)->type].category)
81 #define TOKEN_NAME(token) (token_spellings[(token)->type].name)
83 static cppchar_t handle_newline PARAMS ((cpp_buffer *, cppchar_t));
84 static cppchar_t skip_escaped_newlines PARAMS ((cpp_buffer *, cppchar_t));
85 static cppchar_t get_effective_char PARAMS ((cpp_buffer *));
87 static int skip_block_comment PARAMS ((cpp_reader *));
88 static int skip_line_comment PARAMS ((cpp_reader *));
89 static void adjust_column PARAMS ((cpp_reader *));
90 static void skip_whitespace PARAMS ((cpp_reader *, cppchar_t));
91 static cpp_hashnode *parse_identifier PARAMS ((cpp_reader *, cppchar_t));
92 static void parse_number PARAMS ((cpp_reader *, cpp_string *, cppchar_t, int));
93 static int unescaped_terminator_p PARAMS ((cpp_reader *, const U_CHAR *));
94 static void parse_string PARAMS ((cpp_reader *, cpp_token *, cppchar_t));
95 static void unterminated PARAMS ((cpp_reader *, int));
96 static int trigraph_ok PARAMS ((cpp_reader *, cppchar_t));
97 static void save_comment PARAMS ((cpp_reader *, cpp_token *, const U_CHAR *));
98 static void lex_percent PARAMS ((cpp_buffer *, cpp_token *));
99 static void lex_dot PARAMS ((cpp_reader *, cpp_token *));
100 static int name_p PARAMS ((cpp_reader *, const cpp_string *));
101 static unsigned int parse_escape PARAMS ((cpp_reader *, const unsigned char **,
102 const unsigned char *, HOST_WIDE_INT,
103 int));
104 static unsigned int read_ucs PARAMS ((cpp_reader *, const unsigned char **,
105 const unsigned char *, unsigned int));
107 static cpp_chunk *new_chunk PARAMS ((unsigned int));
108 static int chunk_suitable PARAMS ((cpp_pool *, cpp_chunk *, unsigned int));
109 static unsigned int hex_digit_value PARAMS ((unsigned int));
111 /* Utility routine:
113 Compares, the token TOKEN to the NUL-terminated string STRING.
114 TOKEN must be a CPP_NAME. Returns 1 for equal, 0 for unequal. */
117 cpp_ideq (token, string)
118 const cpp_token *token;
119 const char *string;
121 if (token->type != CPP_NAME)
122 return 0;
124 return !ustrcmp (NODE_NAME (token->val.node), (const U_CHAR *) string);
127 /* Call when meeting a newline. Returns the character after the newline
128 (or carriage-return newline combination), or EOF. */
129 static cppchar_t
130 handle_newline (buffer, newline_char)
131 cpp_buffer *buffer;
132 cppchar_t newline_char;
134 cppchar_t next = EOF;
136 buffer->col_adjust = 0;
137 buffer->lineno++;
138 buffer->line_base = buffer->cur;
140 /* Handle CR-LF and LF-CR combinations, get the next character. */
141 if (buffer->cur < buffer->rlimit)
143 next = *buffer->cur++;
144 if (next + newline_char == '\r' + '\n')
146 buffer->line_base = buffer->cur;
147 if (buffer->cur < buffer->rlimit)
148 next = *buffer->cur++;
149 else
150 next = EOF;
154 buffer->read_ahead = next;
155 return next;
158 /* Subroutine of skip_escaped_newlines; called when a trigraph is
159 encountered. It warns if necessary, and returns true if the
160 trigraph should be honoured. FROM_CHAR is the third character of a
161 trigraph, and presumed to be the previous character for position
162 reporting. */
163 static int
164 trigraph_ok (pfile, from_char)
165 cpp_reader *pfile;
166 cppchar_t from_char;
168 int accept = CPP_OPTION (pfile, trigraphs);
170 /* Don't warn about trigraphs in comments. */
171 if (CPP_OPTION (pfile, warn_trigraphs) && !pfile->state.lexing_comment)
173 cpp_buffer *buffer = pfile->buffer;
174 if (accept)
175 cpp_warning_with_line (pfile, buffer->lineno, CPP_BUF_COL (buffer) - 2,
176 "trigraph ??%c converted to %c",
177 (int) from_char,
178 (int) _cpp_trigraph_map[from_char]);
179 else if (buffer->cur != buffer->last_Wtrigraphs)
181 buffer->last_Wtrigraphs = buffer->cur;
182 cpp_warning_with_line (pfile, buffer->lineno,
183 CPP_BUF_COL (buffer) - 2,
184 "trigraph ??%c ignored", (int) from_char);
188 return accept;
191 /* Assumes local variables buffer and result. */
192 #define ACCEPT_CHAR(t) \
193 do { result->type = t; buffer->read_ahead = EOF; } while (0)
195 /* When we move to multibyte character sets, add to these something
196 that saves and restores the state of the multibyte conversion
197 library. This probably involves saving and restoring a "cookie".
198 In the case of glibc it is an 8-byte structure, so is not a high
199 overhead operation. In any case, it's out of the fast path. */
200 #define SAVE_STATE() do { saved_cur = buffer->cur; } while (0)
201 #define RESTORE_STATE() do { buffer->cur = saved_cur; } while (0)
203 /* Skips any escaped newlines introduced by NEXT, which is either a
204 '?' or a '\\'. Returns the next character, which will also have
205 been placed in buffer->read_ahead. This routine performs
206 preprocessing stages 1 and 2 of the ISO C standard. */
207 static cppchar_t
208 skip_escaped_newlines (buffer, next)
209 cpp_buffer *buffer;
210 cppchar_t next;
212 /* Only do this if we apply stages 1 and 2. */
213 if (!buffer->from_stage3)
215 cppchar_t next1;
216 const unsigned char *saved_cur;
217 int space;
221 if (buffer->cur == buffer->rlimit)
222 break;
224 SAVE_STATE ();
225 if (next == '?')
227 next1 = *buffer->cur++;
228 if (next1 != '?' || buffer->cur == buffer->rlimit)
230 RESTORE_STATE ();
231 break;
234 next1 = *buffer->cur++;
235 if (!_cpp_trigraph_map[next1]
236 || !trigraph_ok (buffer->pfile, next1))
238 RESTORE_STATE ();
239 break;
242 /* We have a full trigraph here. */
243 next = _cpp_trigraph_map[next1];
244 if (next != '\\' || buffer->cur == buffer->rlimit)
245 break;
246 SAVE_STATE ();
249 /* We have a backslash, and room for at least one more character. */
250 space = 0;
253 next1 = *buffer->cur++;
254 if (!is_nvspace (next1))
255 break;
256 space = 1;
258 while (buffer->cur < buffer->rlimit);
260 if (!is_vspace (next1))
262 RESTORE_STATE ();
263 break;
266 if (space && !buffer->pfile->state.lexing_comment)
267 cpp_warning (buffer->pfile,
268 "backslash and newline separated by space");
270 next = handle_newline (buffer, next1);
271 if (next == EOF)
272 cpp_pedwarn (buffer->pfile, "backslash-newline at end of file");
274 while (next == '\\' || next == '?');
277 buffer->read_ahead = next;
278 return next;
281 /* Obtain the next character, after trigraph conversion and skipping
282 an arbitrary string of escaped newlines. The common case of no
283 trigraphs or escaped newlines falls through quickly. */
284 static cppchar_t
285 get_effective_char (buffer)
286 cpp_buffer *buffer;
288 cppchar_t next = EOF;
290 if (buffer->cur < buffer->rlimit)
292 next = *buffer->cur++;
294 /* '?' can introduce trigraphs (and therefore backslash); '\\'
295 can introduce escaped newlines, which we want to skip, or
296 UCNs, which, depending upon lexer state, we will handle in
297 the future. */
298 if (next == '?' || next == '\\')
299 next = skip_escaped_newlines (buffer, next);
302 buffer->read_ahead = next;
303 return next;
306 /* Skip a C-style block comment. We find the end of the comment by
307 seeing if an asterisk is before every '/' we encounter. Returns
308 non-zero if comment terminated by EOF, zero otherwise. */
309 static int
310 skip_block_comment (pfile)
311 cpp_reader *pfile;
313 cpp_buffer *buffer = pfile->buffer;
314 cppchar_t c = EOF, prevc = EOF;
316 pfile->state.lexing_comment = 1;
317 while (buffer->cur != buffer->rlimit)
319 prevc = c, c = *buffer->cur++;
321 next_char:
322 /* FIXME: For speed, create a new character class of characters
323 of interest inside block comments. */
324 if (c == '?' || c == '\\')
325 c = skip_escaped_newlines (buffer, c);
327 /* People like decorating comments with '*', so check for '/'
328 instead for efficiency. */
329 if (c == '/')
331 if (prevc == '*')
332 break;
334 /* Warn about potential nested comments, but not if the '/'
335 comes immediately before the true comment delimeter.
336 Don't bother to get it right across escaped newlines. */
337 if (CPP_OPTION (pfile, warn_comments)
338 && buffer->cur != buffer->rlimit)
340 prevc = c, c = *buffer->cur++;
341 if (c == '*' && buffer->cur != buffer->rlimit)
343 prevc = c, c = *buffer->cur++;
344 if (c != '/')
345 cpp_warning_with_line (pfile, CPP_BUF_LINE (buffer),
346 CPP_BUF_COL (buffer),
347 "\"/*\" within comment");
349 goto next_char;
352 else if (is_vspace (c))
354 prevc = c, c = handle_newline (buffer, c);
355 goto next_char;
357 else if (c == '\t')
358 adjust_column (pfile);
361 pfile->state.lexing_comment = 0;
362 buffer->read_ahead = EOF;
363 return c != '/' || prevc != '*';
366 /* Skip a C++ line comment. Handles escaped newlines. Returns
367 non-zero if a multiline comment. The following new line, if any,
368 is left in buffer->read_ahead. */
369 static int
370 skip_line_comment (pfile)
371 cpp_reader *pfile;
373 cpp_buffer *buffer = pfile->buffer;
374 unsigned int orig_lineno = buffer->lineno;
375 cppchar_t c;
377 pfile->state.lexing_comment = 1;
380 c = EOF;
381 if (buffer->cur == buffer->rlimit)
382 break;
384 c = *buffer->cur++;
385 if (c == '?' || c == '\\')
386 c = skip_escaped_newlines (buffer, c);
388 while (!is_vspace (c));
390 pfile->state.lexing_comment = 0;
391 buffer->read_ahead = c; /* Leave any newline for caller. */
392 return orig_lineno != buffer->lineno;
395 /* pfile->buffer->cur is one beyond the \t character. Update
396 col_adjust so we track the column correctly. */
397 static void
398 adjust_column (pfile)
399 cpp_reader *pfile;
401 cpp_buffer *buffer = pfile->buffer;
402 unsigned int col = CPP_BUF_COL (buffer) - 1; /* Zero-based column. */
404 /* Round it up to multiple of the tabstop, but subtract 1 since the
405 tab itself occupies a character position. */
406 buffer->col_adjust += (CPP_OPTION (pfile, tabstop)
407 - col % CPP_OPTION (pfile, tabstop)) - 1;
410 /* Skips whitespace, saving the next non-whitespace character.
411 Adjusts pfile->col_adjust to account for tabs. Without this,
412 tokens might be assigned an incorrect column. */
413 static void
414 skip_whitespace (pfile, c)
415 cpp_reader *pfile;
416 cppchar_t c;
418 cpp_buffer *buffer = pfile->buffer;
419 unsigned int warned = 0;
423 /* Horizontal space always OK. */
424 if (c == ' ')
426 else if (c == '\t')
427 adjust_column (pfile);
428 /* Just \f \v or \0 left. */
429 else if (c == '\0')
431 if (!warned)
433 cpp_warning (pfile, "null character(s) ignored");
434 warned = 1;
437 else if (pfile->state.in_directive && CPP_PEDANTIC (pfile))
438 cpp_pedwarn_with_line (pfile, CPP_BUF_LINE (buffer),
439 CPP_BUF_COL (buffer),
440 "%s in preprocessing directive",
441 c == '\f' ? "form feed" : "vertical tab");
443 c = EOF;
444 if (buffer->cur == buffer->rlimit)
445 break;
446 c = *buffer->cur++;
448 /* We only want non-vertical space, i.e. ' ' \t \f \v \0. */
449 while (is_nvspace (c));
451 /* Remember the next character. */
452 buffer->read_ahead = c;
455 /* See if the characters of a number token are valid in a name (no
456 '.', '+' or '-'). */
457 static int
458 name_p (pfile, string)
459 cpp_reader *pfile;
460 const cpp_string *string;
462 unsigned int i;
464 for (i = 0; i < string->len; i++)
465 if (!is_idchar (string->text[i]))
466 return 0;
468 return 1;
471 /* Parse an identifier, skipping embedded backslash-newlines.
472 Calculate the hash value of the token while parsing, for improved
473 performance. The hashing algorithm *must* match cpp_lookup(). */
475 static cpp_hashnode *
476 parse_identifier (pfile, c)
477 cpp_reader *pfile;
478 cppchar_t c;
480 cpp_hashnode *result;
481 cpp_buffer *buffer = pfile->buffer;
482 unsigned char *dest, *limit;
483 unsigned int r = 0, saw_dollar = 0;
485 dest = POOL_FRONT (&pfile->ident_pool);
486 limit = POOL_LIMIT (&pfile->ident_pool);
492 /* Need room for terminating null. */
493 if (dest + 1 >= limit)
494 limit = _cpp_next_chunk (&pfile->ident_pool, 0, &dest);
496 *dest++ = c;
497 r = HASHSTEP (r, c);
499 if (c == '$')
500 saw_dollar++;
502 c = EOF;
503 if (buffer->cur == buffer->rlimit)
504 break;
506 c = *buffer->cur++;
508 while (is_idchar (c));
510 /* Potential escaped newline? */
511 if (c != '?' && c != '\\')
512 break;
513 c = skip_escaped_newlines (buffer, c);
515 while (is_idchar (c));
517 /* Remember the next character. */
518 buffer->read_ahead = c;
520 /* $ is not a identifier character in the standard, but is commonly
521 accepted as an extension. Don't warn about it in skipped
522 conditional blocks. */
523 if (saw_dollar && CPP_PEDANTIC (pfile) && ! pfile->skipping)
524 cpp_pedwarn (pfile, "'$' character(s) in identifier");
526 /* Identifiers are null-terminated. */
527 *dest = '\0';
529 /* This routine commits the memory if necessary. */
530 result = _cpp_lookup_with_hash (pfile,
531 dest - POOL_FRONT (&pfile->ident_pool), r);
533 /* Some identifiers require diagnostics when lexed. */
534 if (result->flags & NODE_DIAGNOSTIC && !pfile->skipping)
536 /* It is allowed to poison the same identifier twice. */
537 if ((result->flags & NODE_POISONED) && !pfile->state.poisoned_ok)
538 cpp_error (pfile, "attempt to use poisoned \"%s\"",
539 NODE_NAME (result));
541 /* Constraint 6.10.3.5: __VA_ARGS__ should only appear in the
542 replacement list of a variadic macro. */
543 if (result == pfile->spec_nodes.n__VA_ARGS__
544 && !pfile->state.va_args_ok)
545 cpp_pedwarn (pfile, "__VA_ARGS__ can only appear in the expansion of a C99 variadic macro");
548 return result;
551 /* Parse a number, skipping embedded backslash-newlines. */
552 static void
553 parse_number (pfile, number, c, leading_period)
554 cpp_reader *pfile;
555 cpp_string *number;
556 cppchar_t c;
557 int leading_period;
559 cpp_buffer *buffer = pfile->buffer;
560 cpp_pool *pool = &pfile->ident_pool;
561 unsigned char *dest, *limit;
563 dest = POOL_FRONT (pool);
564 limit = POOL_LIMIT (pool);
566 /* Place a leading period. */
567 if (leading_period)
569 if (dest >= limit)
570 limit = _cpp_next_chunk (pool, 0, &dest);
571 *dest++ = '.';
578 /* Need room for terminating null. */
579 if (dest + 1 >= limit)
580 limit = _cpp_next_chunk (pool, 0, &dest);
581 *dest++ = c;
583 c = EOF;
584 if (buffer->cur == buffer->rlimit)
585 break;
587 c = *buffer->cur++;
589 while (is_numchar (c) || c == '.' || VALID_SIGN (c, dest[-1]));
591 /* Potential escaped newline? */
592 if (c != '?' && c != '\\')
593 break;
594 c = skip_escaped_newlines (buffer, c);
596 while (is_numchar (c) || c == '.' || VALID_SIGN (c, dest[-1]));
598 /* Remember the next character. */
599 buffer->read_ahead = c;
601 /* Null-terminate the number. */
602 *dest = '\0';
604 number->text = POOL_FRONT (pool);
605 number->len = dest - number->text;
606 POOL_COMMIT (pool, number->len + 1);
609 /* Subroutine of parse_string. Emits error for unterminated strings. */
610 static void
611 unterminated (pfile, term)
612 cpp_reader *pfile;
613 int term;
615 cpp_error (pfile, "missing terminating %c character", term);
617 if (term == '\"' && pfile->mlstring_pos.line
618 && pfile->mlstring_pos.line != pfile->lexer_pos.line)
620 cpp_error_with_line (pfile, pfile->mlstring_pos.line,
621 pfile->mlstring_pos.col,
622 "possible start of unterminated string literal");
623 pfile->mlstring_pos.line = 0;
627 /* Subroutine of parse_string. */
628 static int
629 unescaped_terminator_p (pfile, dest)
630 cpp_reader *pfile;
631 const unsigned char *dest;
633 const unsigned char *start, *temp;
635 /* In #include-style directives, terminators are not escapeable. */
636 if (pfile->state.angled_headers)
637 return 1;
639 start = POOL_FRONT (&pfile->ident_pool);
641 /* An odd number of consecutive backslashes represents an escaped
642 terminator. */
643 for (temp = dest; temp > start && temp[-1] == '\\'; temp--)
646 return ((dest - temp) & 1) == 0;
649 /* Parses a string, character constant, or angle-bracketed header file
650 name. Handles embedded trigraphs and escaped newlines. The stored
651 string is guaranteed NUL-terminated, but it is not guaranteed that
652 this is the first NUL since embedded NULs are preserved.
654 Multi-line strings are allowed, but they are deprecated. */
655 static void
656 parse_string (pfile, token, terminator)
657 cpp_reader *pfile;
658 cpp_token *token;
659 cppchar_t terminator;
661 cpp_buffer *buffer = pfile->buffer;
662 cpp_pool *pool = &pfile->ident_pool;
663 unsigned char *dest, *limit;
664 cppchar_t c;
665 unsigned int nulls = 0;
667 dest = POOL_FRONT (pool);
668 limit = POOL_LIMIT (pool);
670 for (;;)
672 if (buffer->cur == buffer->rlimit)
673 c = EOF;
674 else
675 c = *buffer->cur++;
677 have_char:
678 /* We need space for the terminating NUL. */
679 if (dest >= limit)
680 limit = _cpp_next_chunk (pool, 0, &dest);
682 if (c == EOF)
684 unterminated (pfile, terminator);
685 break;
688 /* Handle trigraphs, escaped newlines etc. */
689 if (c == '?' || c == '\\')
690 c = skip_escaped_newlines (buffer, c);
692 if (c == terminator && unescaped_terminator_p (pfile, dest))
694 c = EOF;
695 break;
697 else if (is_vspace (c))
699 /* In assembly language, silently terminate string and
700 character literals at end of line. This is a kludge
701 around not knowing where comments are. */
702 if (CPP_OPTION (pfile, lang) == CLK_ASM && terminator != '>')
703 break;
705 /* Character constants and header names may not extend over
706 multiple lines. In Standard C, neither may strings.
707 Unfortunately, we accept multiline strings as an
708 extension, except in #include family directives. */
709 if (terminator != '"' || pfile->state.angled_headers)
711 unterminated (pfile, terminator);
712 break;
715 cpp_pedwarn (pfile, "multi-line string literals are deprecated");
716 if (pfile->mlstring_pos.line == 0)
717 pfile->mlstring_pos = pfile->lexer_pos;
719 c = handle_newline (buffer, c);
720 *dest++ = '\n';
721 goto have_char;
723 else if (c == '\0')
725 if (nulls++ == 0)
726 cpp_warning (pfile, "null character(s) preserved in literal");
729 *dest++ = c;
732 /* Remember the next character. */
733 buffer->read_ahead = c;
734 *dest = '\0';
736 token->val.str.text = POOL_FRONT (pool);
737 token->val.str.len = dest - token->val.str.text;
738 POOL_COMMIT (pool, token->val.str.len + 1);
741 /* The stored comment includes the comment start and any terminator. */
742 static void
743 save_comment (pfile, token, from)
744 cpp_reader *pfile;
745 cpp_token *token;
746 const unsigned char *from;
748 unsigned char *buffer;
749 unsigned int len;
751 len = pfile->buffer->cur - from + 1; /* + 1 for the initial '/'. */
752 /* C++ comments probably (not definitely) have moved past a new
753 line, which we don't want to save in the comment. */
754 if (pfile->buffer->read_ahead != EOF)
755 len--;
756 buffer = _cpp_pool_alloc (&pfile->ident_pool, len);
758 token->type = CPP_COMMENT;
759 token->val.str.len = len;
760 token->val.str.text = buffer;
762 buffer[0] = '/';
763 memcpy (buffer + 1, from, len - 1);
766 /* Subroutine of lex_token to handle '%'. A little tricky, since we
767 want to avoid stepping back when lexing %:%X. */
768 static void
769 lex_percent (buffer, result)
770 cpp_buffer *buffer;
771 cpp_token *result;
773 cppchar_t c;
775 result->type = CPP_MOD;
776 /* Parsing %:%X could leave an extra character. */
777 if (buffer->extra_char == EOF)
778 c = get_effective_char (buffer);
779 else
781 c = buffer->read_ahead = buffer->extra_char;
782 buffer->extra_char = EOF;
785 if (c == '=')
786 ACCEPT_CHAR (CPP_MOD_EQ);
787 else if (CPP_OPTION (buffer->pfile, digraphs))
789 if (c == ':')
791 result->flags |= DIGRAPH;
792 ACCEPT_CHAR (CPP_HASH);
793 if (get_effective_char (buffer) == '%')
795 buffer->extra_char = get_effective_char (buffer);
796 if (buffer->extra_char == ':')
798 buffer->extra_char = EOF;
799 ACCEPT_CHAR (CPP_PASTE);
801 else
802 /* We'll catch the extra_char when we're called back. */
803 buffer->read_ahead = '%';
806 else if (c == '>')
808 result->flags |= DIGRAPH;
809 ACCEPT_CHAR (CPP_CLOSE_BRACE);
814 /* Subroutine of lex_token to handle '.'. This is tricky, since we
815 want to avoid stepping back when lexing '...' or '.123'. In the
816 latter case we should also set a flag for parse_number. */
817 static void
818 lex_dot (pfile, result)
819 cpp_reader *pfile;
820 cpp_token *result;
822 cpp_buffer *buffer = pfile->buffer;
823 cppchar_t c;
825 /* Parsing ..X could leave an extra character. */
826 if (buffer->extra_char == EOF)
827 c = get_effective_char (buffer);
828 else
830 c = buffer->read_ahead = buffer->extra_char;
831 buffer->extra_char = EOF;
834 /* All known character sets have 0...9 contiguous. */
835 if (c >= '0' && c <= '9')
837 result->type = CPP_NUMBER;
838 parse_number (pfile, &result->val.str, c, 1);
840 else
842 result->type = CPP_DOT;
843 if (c == '.')
845 buffer->extra_char = get_effective_char (buffer);
846 if (buffer->extra_char == '.')
848 buffer->extra_char = EOF;
849 ACCEPT_CHAR (CPP_ELLIPSIS);
851 else
852 /* We'll catch the extra_char when we're called back. */
853 buffer->read_ahead = '.';
855 else if (c == '*' && CPP_OPTION (pfile, cplusplus))
856 ACCEPT_CHAR (CPP_DOT_STAR);
860 void
861 _cpp_lex_token (pfile, result)
862 cpp_reader *pfile;
863 cpp_token *result;
865 cppchar_t c;
866 cpp_buffer *buffer;
867 const unsigned char *comment_start;
868 unsigned char bol;
870 skip:
871 bol = pfile->state.next_bol;
872 done_directive:
873 buffer = pfile->buffer;
874 pfile->state.next_bol = 0;
875 result->flags = buffer->saved_flags;
876 buffer->saved_flags = 0;
877 next_char:
878 pfile->lexer_pos.line = buffer->lineno;
879 next_char2:
880 pfile->lexer_pos.col = CPP_BUF_COLUMN (buffer, buffer->cur);
882 c = buffer->read_ahead;
883 if (c == EOF && buffer->cur < buffer->rlimit)
885 c = *buffer->cur++;
886 pfile->lexer_pos.col++;
889 do_switch:
890 buffer->read_ahead = EOF;
891 switch (c)
893 case EOF:
894 /* Non-empty files should end in a newline. Checking "bol" too
895 prevents multiple warnings when hitting the EOF more than
896 once, like in a directive. Don't warn for command line and
897 _Pragma buffers. */
898 if (pfile->lexer_pos.col != 0 && !bol && !buffer->from_stage3)
899 cpp_pedwarn (pfile, "no newline at end of file");
900 pfile->state.next_bol = 1;
901 pfile->skipping = 0; /* In case missing #endif. */
902 result->type = CPP_EOF;
903 /* Don't do MI optimisation. */
904 return;
906 case ' ': case '\t': case '\f': case '\v': case '\0':
907 skip_whitespace (pfile, c);
908 result->flags |= PREV_WHITE;
909 goto next_char2;
911 case '\n': case '\r':
912 if (!pfile->state.in_directive)
914 handle_newline (buffer, c);
915 bol = 1;
916 pfile->lexer_pos.output_line = buffer->lineno;
917 /* This is a new line, so clear any white space flag.
918 Newlines in arguments are white space (6.10.3.10);
919 parse_arg takes care of that. */
920 result->flags &= ~(PREV_WHITE | AVOID_LPASTE);
921 goto next_char;
924 /* Don't let directives spill over to the next line. */
925 buffer->read_ahead = c;
926 pfile->state.next_bol = 1;
927 result->type = CPP_EOF;
928 /* Don't break; pfile->skipping might be true. */
929 return;
931 case '?':
932 case '\\':
933 /* These could start an escaped newline, or '?' a trigraph. Let
934 skip_escaped_newlines do all the work. */
936 unsigned int lineno = buffer->lineno;
938 c = skip_escaped_newlines (buffer, c);
939 if (lineno != buffer->lineno)
940 /* We had at least one escaped newline of some sort, and the
941 next character is in buffer->read_ahead. Update the
942 token's line and column. */
943 goto next_char;
945 /* We are either the original '?' or '\\', or a trigraph. */
946 result->type = CPP_QUERY;
947 buffer->read_ahead = EOF;
948 if (c == '\\')
949 goto random_char;
950 else if (c != '?')
951 goto do_switch;
953 break;
955 case '0': case '1': case '2': case '3': case '4':
956 case '5': case '6': case '7': case '8': case '9':
957 result->type = CPP_NUMBER;
958 parse_number (pfile, &result->val.str, c, 0);
959 break;
961 case '$':
962 if (!CPP_OPTION (pfile, dollars_in_ident))
963 goto random_char;
964 /* Fall through... */
966 case '_':
967 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
968 case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
969 case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
970 case 's': case 't': case 'u': case 'v': case 'w': case 'x':
971 case 'y': case 'z':
972 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
973 case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
974 case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
975 case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
976 case 'Y': case 'Z':
977 result->type = CPP_NAME;
978 result->val.node = parse_identifier (pfile, c);
980 /* 'L' may introduce wide characters or strings. */
981 if (result->val.node == pfile->spec_nodes.n_L)
983 c = buffer->read_ahead; /* For make_string. */
984 if (c == '\'' || c == '"')
986 ACCEPT_CHAR (c == '"' ? CPP_WSTRING: CPP_WCHAR);
987 goto make_string;
990 /* Convert named operators to their proper types. */
991 else if (result->val.node->flags & NODE_OPERATOR)
993 result->flags |= NAMED_OP;
994 result->type = result->val.node->value.operator;
996 break;
998 case '\'':
999 case '"':
1000 result->type = c == '"' ? CPP_STRING: CPP_CHAR;
1001 make_string:
1002 parse_string (pfile, result, c);
1003 break;
1005 case '/':
1006 /* A potential block or line comment. */
1007 comment_start = buffer->cur;
1008 result->type = CPP_DIV;
1009 c = get_effective_char (buffer);
1010 if (c == '=')
1011 ACCEPT_CHAR (CPP_DIV_EQ);
1012 if (c != '/' && c != '*')
1013 break;
1014 if (buffer->from_stage3)
1015 break;
1017 if (c == '*')
1019 if (skip_block_comment (pfile))
1020 cpp_error_with_line (pfile, pfile->lexer_pos.line,
1021 pfile->lexer_pos.col,
1022 "unterminated comment");
1024 else
1026 if (!CPP_OPTION (pfile, cplusplus_comments)
1027 && !CPP_IN_SYSTEM_HEADER (pfile))
1028 break;
1030 /* Warn about comments only if pedantically GNUC89, and not
1031 in system headers. */
1032 if (CPP_OPTION (pfile, lang) == CLK_GNUC89 && CPP_PEDANTIC (pfile)
1033 && ! buffer->warned_cplusplus_comments)
1035 cpp_pedwarn (pfile,
1036 "C++ style comments are not allowed in ISO C89");
1037 cpp_pedwarn (pfile,
1038 "(this will be reported only once per input file)");
1039 buffer->warned_cplusplus_comments = 1;
1042 /* Skip_line_comment updates buffer->read_ahead. */
1043 if (skip_line_comment (pfile) && CPP_OPTION (pfile, warn_comments))
1044 cpp_warning_with_line (pfile, pfile->lexer_pos.line,
1045 pfile->lexer_pos.col,
1046 "multi-line comment");
1049 /* Skipping the comment has updated buffer->read_ahead. */
1050 if (!pfile->state.save_comments)
1052 result->flags |= PREV_WHITE;
1053 goto next_char;
1056 /* Save the comment as a token in its own right. */
1057 save_comment (pfile, result, comment_start);
1058 /* Don't do MI optimisation. */
1059 return;
1061 case '<':
1062 if (pfile->state.angled_headers)
1064 result->type = CPP_HEADER_NAME;
1065 c = '>'; /* terminator. */
1066 goto make_string;
1069 result->type = CPP_LESS;
1070 c = get_effective_char (buffer);
1071 if (c == '=')
1072 ACCEPT_CHAR (CPP_LESS_EQ);
1073 else if (c == '<')
1075 ACCEPT_CHAR (CPP_LSHIFT);
1076 if (get_effective_char (buffer) == '=')
1077 ACCEPT_CHAR (CPP_LSHIFT_EQ);
1079 else if (c == '?' && CPP_OPTION (pfile, cplusplus))
1081 ACCEPT_CHAR (CPP_MIN);
1082 if (get_effective_char (buffer) == '=')
1083 ACCEPT_CHAR (CPP_MIN_EQ);
1085 else if (c == ':' && CPP_OPTION (pfile, digraphs))
1087 ACCEPT_CHAR (CPP_OPEN_SQUARE);
1088 result->flags |= DIGRAPH;
1090 else if (c == '%' && CPP_OPTION (pfile, digraphs))
1092 ACCEPT_CHAR (CPP_OPEN_BRACE);
1093 result->flags |= DIGRAPH;
1095 break;
1097 case '>':
1098 result->type = CPP_GREATER;
1099 c = get_effective_char (buffer);
1100 if (c == '=')
1101 ACCEPT_CHAR (CPP_GREATER_EQ);
1102 else if (c == '>')
1104 ACCEPT_CHAR (CPP_RSHIFT);
1105 if (get_effective_char (buffer) == '=')
1106 ACCEPT_CHAR (CPP_RSHIFT_EQ);
1108 else if (c == '?' && CPP_OPTION (pfile, cplusplus))
1110 ACCEPT_CHAR (CPP_MAX);
1111 if (get_effective_char (buffer) == '=')
1112 ACCEPT_CHAR (CPP_MAX_EQ);
1114 break;
1116 case '%':
1117 lex_percent (buffer, result);
1118 if (result->type == CPP_HASH)
1119 goto do_hash;
1120 break;
1122 case '.':
1123 lex_dot (pfile, result);
1124 break;
1126 case '+':
1127 result->type = CPP_PLUS;
1128 c = get_effective_char (buffer);
1129 if (c == '=')
1130 ACCEPT_CHAR (CPP_PLUS_EQ);
1131 else if (c == '+')
1132 ACCEPT_CHAR (CPP_PLUS_PLUS);
1133 break;
1135 case '-':
1136 result->type = CPP_MINUS;
1137 c = get_effective_char (buffer);
1138 if (c == '>')
1140 ACCEPT_CHAR (CPP_DEREF);
1141 if (CPP_OPTION (pfile, cplusplus)
1142 && get_effective_char (buffer) == '*')
1143 ACCEPT_CHAR (CPP_DEREF_STAR);
1145 else if (c == '=')
1146 ACCEPT_CHAR (CPP_MINUS_EQ);
1147 else if (c == '-')
1148 ACCEPT_CHAR (CPP_MINUS_MINUS);
1149 break;
1151 case '*':
1152 result->type = CPP_MULT;
1153 if (get_effective_char (buffer) == '=')
1154 ACCEPT_CHAR (CPP_MULT_EQ);
1155 break;
1157 case '=':
1158 result->type = CPP_EQ;
1159 if (get_effective_char (buffer) == '=')
1160 ACCEPT_CHAR (CPP_EQ_EQ);
1161 break;
1163 case '!':
1164 result->type = CPP_NOT;
1165 if (get_effective_char (buffer) == '=')
1166 ACCEPT_CHAR (CPP_NOT_EQ);
1167 break;
1169 case '&':
1170 result->type = CPP_AND;
1171 c = get_effective_char (buffer);
1172 if (c == '=')
1173 ACCEPT_CHAR (CPP_AND_EQ);
1174 else if (c == '&')
1175 ACCEPT_CHAR (CPP_AND_AND);
1176 break;
1178 case '#':
1179 c = buffer->extra_char; /* Can be set by error condition below. */
1180 if (c != EOF)
1182 buffer->read_ahead = c;
1183 buffer->extra_char = EOF;
1185 else
1186 c = get_effective_char (buffer);
1188 if (c == '#')
1190 ACCEPT_CHAR (CPP_PASTE);
1191 break;
1194 result->type = CPP_HASH;
1195 do_hash:
1196 if (!bol)
1197 break;
1198 /* 6.10.3 paragraph 11: If there are sequences of preprocessing
1199 tokens within the list of arguments that would otherwise act
1200 as preprocessing directives, the behavior is undefined.
1202 This implementation will report a hard error, terminate the
1203 macro invocation, and proceed to process the directive. */
1204 if (pfile->state.parsing_args)
1206 if (pfile->state.parsing_args == 2)
1207 cpp_error (pfile,
1208 "directives may not be used inside a macro argument");
1210 /* Put a '#' in lookahead, return CPP_EOF for parse_arg. */
1211 buffer->extra_char = buffer->read_ahead;
1212 buffer->read_ahead = '#';
1213 pfile->state.next_bol = 1;
1214 result->type = CPP_EOF;
1216 /* Get whitespace right - newline_in_args sets it. */
1217 if (pfile->lexer_pos.col == 1)
1218 result->flags &= ~(PREV_WHITE | AVOID_LPASTE);
1220 else
1222 /* This is the hash introducing a directive. */
1223 if (_cpp_handle_directive (pfile, result->flags & PREV_WHITE))
1224 goto done_directive; /* bol still 1. */
1225 /* This is in fact an assembler #. */
1227 break;
1229 case '|':
1230 result->type = CPP_OR;
1231 c = get_effective_char (buffer);
1232 if (c == '=')
1233 ACCEPT_CHAR (CPP_OR_EQ);
1234 else if (c == '|')
1235 ACCEPT_CHAR (CPP_OR_OR);
1236 break;
1238 case '^':
1239 result->type = CPP_XOR;
1240 if (get_effective_char (buffer) == '=')
1241 ACCEPT_CHAR (CPP_XOR_EQ);
1242 break;
1244 case ':':
1245 result->type = CPP_COLON;
1246 c = get_effective_char (buffer);
1247 if (c == ':' && CPP_OPTION (pfile, cplusplus))
1248 ACCEPT_CHAR (CPP_SCOPE);
1249 else if (c == '>' && CPP_OPTION (pfile, digraphs))
1251 result->flags |= DIGRAPH;
1252 ACCEPT_CHAR (CPP_CLOSE_SQUARE);
1254 break;
1256 case '~': result->type = CPP_COMPL; break;
1257 case ',': result->type = CPP_COMMA; break;
1258 case '(': result->type = CPP_OPEN_PAREN; break;
1259 case ')': result->type = CPP_CLOSE_PAREN; break;
1260 case '[': result->type = CPP_OPEN_SQUARE; break;
1261 case ']': result->type = CPP_CLOSE_SQUARE; break;
1262 case '{': result->type = CPP_OPEN_BRACE; break;
1263 case '}': result->type = CPP_CLOSE_BRACE; break;
1264 case ';': result->type = CPP_SEMICOLON; break;
1266 /* @ is a punctuator in Objective C. */
1267 case '@': result->type = CPP_ATSIGN; break;
1269 random_char:
1270 default:
1271 result->type = CPP_OTHER;
1272 result->val.c = c;
1273 break;
1276 if (pfile->skipping)
1277 goto skip;
1279 /* If not in a directive, this token invalidates controlling macros. */
1280 if (!pfile->state.in_directive)
1281 pfile->mi_state = MI_FAILED;
1284 /* An upper bound on the number of bytes needed to spell a token,
1285 including preceding whitespace. */
1286 unsigned int
1287 cpp_token_len (token)
1288 const cpp_token *token;
1290 unsigned int len;
1292 switch (TOKEN_SPELL (token))
1294 default: len = 0; break;
1295 case SPELL_STRING: len = token->val.str.len; break;
1296 case SPELL_IDENT: len = NODE_LEN (token->val.node); break;
1298 /* 1 for whitespace, 4 for comment delimeters. */
1299 return len + 5;
1302 /* Write the spelling of a token TOKEN to BUFFER. The buffer must
1303 already contain the enough space to hold the token's spelling.
1304 Returns a pointer to the character after the last character
1305 written. */
1306 unsigned char *
1307 cpp_spell_token (pfile, token, buffer)
1308 cpp_reader *pfile; /* Would be nice to be rid of this... */
1309 const cpp_token *token;
1310 unsigned char *buffer;
1312 switch (TOKEN_SPELL (token))
1314 case SPELL_OPERATOR:
1316 const unsigned char *spelling;
1317 unsigned char c;
1319 if (token->flags & DIGRAPH)
1320 spelling
1321 = digraph_spellings[(int) token->type - (int) CPP_FIRST_DIGRAPH];
1322 else if (token->flags & NAMED_OP)
1323 goto spell_ident;
1324 else
1325 spelling = TOKEN_NAME (token);
1327 while ((c = *spelling++) != '\0')
1328 *buffer++ = c;
1330 break;
1332 case SPELL_IDENT:
1333 spell_ident:
1334 memcpy (buffer, NODE_NAME (token->val.node), NODE_LEN (token->val.node));
1335 buffer += NODE_LEN (token->val.node);
1336 break;
1338 case SPELL_STRING:
1340 int left, right, tag;
1341 switch (token->type)
1343 case CPP_STRING: left = '"'; right = '"'; tag = '\0'; break;
1344 case CPP_WSTRING: left = '"'; right = '"'; tag = 'L'; break;
1345 case CPP_CHAR: left = '\''; right = '\''; tag = '\0'; break;
1346 case CPP_WCHAR: left = '\''; right = '\''; tag = 'L'; break;
1347 case CPP_HEADER_NAME: left = '<'; right = '>'; tag = '\0'; break;
1348 default: left = '\0'; right = '\0'; tag = '\0'; break;
1350 if (tag) *buffer++ = tag;
1351 if (left) *buffer++ = left;
1352 memcpy (buffer, token->val.str.text, token->val.str.len);
1353 buffer += token->val.str.len;
1354 if (right) *buffer++ = right;
1356 break;
1358 case SPELL_CHAR:
1359 *buffer++ = token->val.c;
1360 break;
1362 case SPELL_NONE:
1363 cpp_ice (pfile, "Unspellable token %s", TOKEN_NAME (token));
1364 break;
1367 return buffer;
1370 /* Returns a token as a null-terminated string. The string is
1371 temporary, and automatically freed later. Useful for diagnostics. */
1372 unsigned char *
1373 cpp_token_as_text (pfile, token)
1374 cpp_reader *pfile;
1375 const cpp_token *token;
1377 unsigned int len = cpp_token_len (token);
1378 unsigned char *start = _cpp_pool_alloc (&pfile->ident_pool, len), *end;
1380 end = cpp_spell_token (pfile, token, start);
1381 end[0] = '\0';
1383 return start;
1386 /* Used by C front ends. Should really move to using cpp_token_as_text. */
1387 const char *
1388 cpp_type2name (type)
1389 enum cpp_ttype type;
1391 return (const char *) token_spellings[type].name;
1394 /* Writes the spelling of token to FP. Separate from cpp_spell_token
1395 for efficiency - to avoid double-buffering. Also, outputs a space
1396 if PREV_WHITE is flagged. */
1397 void
1398 cpp_output_token (token, fp)
1399 const cpp_token *token;
1400 FILE *fp;
1402 if (token->flags & PREV_WHITE)
1403 putc (' ', fp);
1405 switch (TOKEN_SPELL (token))
1407 case SPELL_OPERATOR:
1409 const unsigned char *spelling;
1411 if (token->flags & DIGRAPH)
1412 spelling
1413 = digraph_spellings[(int) token->type - (int) CPP_FIRST_DIGRAPH];
1414 else if (token->flags & NAMED_OP)
1415 goto spell_ident;
1416 else
1417 spelling = TOKEN_NAME (token);
1419 ufputs (spelling, fp);
1421 break;
1423 spell_ident:
1424 case SPELL_IDENT:
1425 ufputs (NODE_NAME (token->val.node), fp);
1426 break;
1428 case SPELL_STRING:
1430 int left, right, tag;
1431 switch (token->type)
1433 case CPP_STRING: left = '"'; right = '"'; tag = '\0'; break;
1434 case CPP_WSTRING: left = '"'; right = '"'; tag = 'L'; break;
1435 case CPP_CHAR: left = '\''; right = '\''; tag = '\0'; break;
1436 case CPP_WCHAR: left = '\''; right = '\''; tag = 'L'; break;
1437 case CPP_HEADER_NAME: left = '<'; right = '>'; tag = '\0'; break;
1438 default: left = '\0'; right = '\0'; tag = '\0'; break;
1440 if (tag) putc (tag, fp);
1441 if (left) putc (left, fp);
1442 fwrite (token->val.str.text, 1, token->val.str.len, fp);
1443 if (right) putc (right, fp);
1445 break;
1447 case SPELL_CHAR:
1448 putc (token->val.c, fp);
1449 break;
1451 case SPELL_NONE:
1452 /* An error, most probably. */
1453 break;
1457 /* Compare two tokens. */
1459 _cpp_equiv_tokens (a, b)
1460 const cpp_token *a, *b;
1462 if (a->type == b->type && a->flags == b->flags)
1463 switch (TOKEN_SPELL (a))
1465 default: /* Keep compiler happy. */
1466 case SPELL_OPERATOR:
1467 return 1;
1468 case SPELL_CHAR:
1469 return a->val.c == b->val.c; /* Character. */
1470 case SPELL_NONE:
1471 return (a->type != CPP_MACRO_ARG || a->val.arg_no == b->val.arg_no);
1472 case SPELL_IDENT:
1473 return a->val.node == b->val.node;
1474 case SPELL_STRING:
1475 return (a->val.str.len == b->val.str.len
1476 && !memcmp (a->val.str.text, b->val.str.text,
1477 a->val.str.len));
1480 return 0;
1483 /* Determine whether two tokens can be pasted together, and if so,
1484 what the resulting token is. Returns CPP_EOF if the tokens cannot
1485 be pasted, or the appropriate type for the merged token if they
1486 can. */
1487 enum cpp_ttype
1488 cpp_can_paste (pfile, token1, token2, digraph)
1489 cpp_reader * pfile;
1490 const cpp_token *token1, *token2;
1491 int* digraph;
1493 enum cpp_ttype a = token1->type, b = token2->type;
1494 int cxx = CPP_OPTION (pfile, cplusplus);
1496 /* Treat named operators as if they were ordinary NAMEs. */
1497 if (token1->flags & NAMED_OP)
1498 a = CPP_NAME;
1499 if (token2->flags & NAMED_OP)
1500 b = CPP_NAME;
1502 if ((int) a <= (int) CPP_LAST_EQ && b == CPP_EQ)
1503 return (enum cpp_ttype) ((int) a + ((int) CPP_EQ_EQ - (int) CPP_EQ));
1505 switch (a)
1507 case CPP_GREATER:
1508 if (b == a) return CPP_RSHIFT;
1509 if (b == CPP_QUERY && cxx) return CPP_MAX;
1510 if (b == CPP_GREATER_EQ) return CPP_RSHIFT_EQ;
1511 break;
1512 case CPP_LESS:
1513 if (b == a) return CPP_LSHIFT;
1514 if (b == CPP_QUERY && cxx) return CPP_MIN;
1515 if (b == CPP_LESS_EQ) return CPP_LSHIFT_EQ;
1516 if (CPP_OPTION (pfile, digraphs))
1518 if (b == CPP_COLON)
1519 {*digraph = 1; return CPP_OPEN_SQUARE;} /* <: digraph */
1520 if (b == CPP_MOD)
1521 {*digraph = 1; return CPP_OPEN_BRACE;} /* <% digraph */
1523 break;
1525 case CPP_PLUS: if (b == a) return CPP_PLUS_PLUS; break;
1526 case CPP_AND: if (b == a) return CPP_AND_AND; break;
1527 case CPP_OR: if (b == a) return CPP_OR_OR; break;
1529 case CPP_MINUS:
1530 if (b == a) return CPP_MINUS_MINUS;
1531 if (b == CPP_GREATER) return CPP_DEREF;
1532 break;
1533 case CPP_COLON:
1534 if (b == a && cxx) return CPP_SCOPE;
1535 if (b == CPP_GREATER && CPP_OPTION (pfile, digraphs))
1536 {*digraph = 1; return CPP_CLOSE_SQUARE;} /* :> digraph */
1537 break;
1539 case CPP_MOD:
1540 if (CPP_OPTION (pfile, digraphs))
1542 if (b == CPP_GREATER)
1543 {*digraph = 1; return CPP_CLOSE_BRACE;} /* %> digraph */
1544 if (b == CPP_COLON)
1545 {*digraph = 1; return CPP_HASH;} /* %: digraph */
1547 break;
1548 case CPP_DEREF:
1549 if (b == CPP_MULT && cxx) return CPP_DEREF_STAR;
1550 break;
1551 case CPP_DOT:
1552 if (b == CPP_MULT && cxx) return CPP_DOT_STAR;
1553 if (b == CPP_NUMBER) return CPP_NUMBER;
1554 break;
1556 case CPP_HASH:
1557 if (b == a && (token1->flags & DIGRAPH) == (token2->flags & DIGRAPH))
1558 /* %:%: digraph */
1559 {*digraph = (token1->flags & DIGRAPH); return CPP_PASTE;}
1560 break;
1562 case CPP_NAME:
1563 if (b == CPP_NAME) return CPP_NAME;
1564 if (b == CPP_NUMBER
1565 && name_p (pfile, &token2->val.str)) return CPP_NAME;
1566 if (b == CPP_CHAR
1567 && token1->val.node == pfile->spec_nodes.n_L) return CPP_WCHAR;
1568 if (b == CPP_STRING
1569 && token1->val.node == pfile->spec_nodes.n_L) return CPP_WSTRING;
1570 break;
1572 case CPP_NUMBER:
1573 if (b == CPP_NUMBER) return CPP_NUMBER;
1574 if (b == CPP_NAME) return CPP_NUMBER;
1575 if (b == CPP_DOT) return CPP_NUMBER;
1576 /* Numbers cannot have length zero, so this is safe. */
1577 if ((b == CPP_PLUS || b == CPP_MINUS)
1578 && VALID_SIGN ('+', token1->val.str.text[token1->val.str.len - 1]))
1579 return CPP_NUMBER;
1580 break;
1582 default:
1583 break;
1586 return CPP_EOF;
1589 /* Returns nonzero if a space should be inserted to avoid an
1590 accidental token paste for output. For simplicity, it is
1591 conservative, and occasionally advises a space where one is not
1592 needed, e.g. "." and ".2". */
1595 cpp_avoid_paste (pfile, token1, token2)
1596 cpp_reader *pfile;
1597 const cpp_token *token1, *token2;
1599 enum cpp_ttype a = token1->type, b = token2->type;
1600 cppchar_t c;
1602 if (token1->flags & NAMED_OP)
1603 a = CPP_NAME;
1604 if (token2->flags & NAMED_OP)
1605 b = CPP_NAME;
1607 c = EOF;
1608 if (token2->flags & DIGRAPH)
1609 c = digraph_spellings[(int) b - (int) CPP_FIRST_DIGRAPH][0];
1610 else if (token_spellings[b].category == SPELL_OPERATOR)
1611 c = token_spellings[b].name[0];
1613 /* Quickly get everything that can paste with an '='. */
1614 if ((int) a <= (int) CPP_LAST_EQ && c == '=')
1615 return 1;
1617 switch (a)
1619 case CPP_GREATER: return c == '>' || c == '?';
1620 case CPP_LESS: return c == '<' || c == '?' || c == '%' || c == ':';
1621 case CPP_PLUS: return c == '+';
1622 case CPP_MINUS: return c == '-' || c == '>';
1623 case CPP_DIV: return c == '/' || c == '*'; /* Comments. */
1624 case CPP_MOD: return c == ':' || c == '>';
1625 case CPP_AND: return c == '&';
1626 case CPP_OR: return c == '|';
1627 case CPP_COLON: return c == ':' || c == '>';
1628 case CPP_DEREF: return c == '*';
1629 case CPP_DOT: return c == '.' || c == '%' || b == CPP_NUMBER;
1630 case CPP_HASH: return c == '#' || c == '%'; /* Digraph form. */
1631 case CPP_NAME: return ((b == CPP_NUMBER
1632 && name_p (pfile, &token2->val.str))
1633 || b == CPP_NAME
1634 || b == CPP_CHAR || b == CPP_STRING); /* L */
1635 case CPP_NUMBER: return (b == CPP_NUMBER || b == CPP_NAME
1636 || c == '.' || c == '+' || c == '-');
1637 case CPP_OTHER: return (CPP_OPTION (pfile, objc)
1638 && token1->val.c == '@'
1639 && (b == CPP_NAME || b == CPP_STRING));
1640 default: break;
1643 return 0;
1646 /* Output all the remaining tokens on the current line, and a newline
1647 character, to FP. Leading whitespace is removed. */
1648 void
1649 cpp_output_line (pfile, fp)
1650 cpp_reader *pfile;
1651 FILE *fp;
1653 cpp_token token;
1655 cpp_get_token (pfile, &token);
1656 token.flags &= ~PREV_WHITE;
1657 while (token.type != CPP_EOF)
1659 cpp_output_token (&token, fp);
1660 cpp_get_token (pfile, &token);
1663 putc ('\n', fp);
1666 /* Returns the value of a hexadecimal digit. */
1667 static unsigned int
1668 hex_digit_value (c)
1669 unsigned int c;
1671 if (c >= 'a' && c <= 'f')
1672 return c - 'a' + 10;
1673 if (c >= 'A' && c <= 'F')
1674 return c - 'A' + 10;
1675 if (c >= '0' && c <= '9')
1676 return c - '0';
1677 abort ();
1680 /* Parse a '\uNNNN' or '\UNNNNNNNN' sequence (C++ and C99).
1682 [lex.charset]: The character designated by the universal character
1683 name \UNNNNNNNN is that character whose character short name in
1684 ISO/IEC 10646 is NNNNNNNN; the character designated by the
1685 universal character name \uNNNN is that character whose character
1686 short name in ISO/IEC 10646 is 0000NNNN. If the hexadecimal value
1687 for a universal character name is less than 0x20 or in the range
1688 0x7F-0x9F (inclusive), or if the universal character name
1689 designates a character in the basic source character set, then the
1690 program is ill-formed.
1692 We assume that wchar_t is Unicode, so we don't need to do any
1693 mapping. Is this ever wrong? */
1695 static unsigned int
1696 read_ucs (pfile, pstr, limit, length)
1697 cpp_reader *pfile;
1698 const unsigned char **pstr;
1699 const unsigned char *limit;
1700 unsigned int length;
1702 const unsigned char *p = *pstr;
1703 unsigned int c, code = 0;
1705 for (; length; --length)
1707 if (p >= limit)
1709 cpp_error (pfile, "incomplete universal-character-name");
1710 break;
1713 c = *p;
1714 if (ISXDIGIT (c))
1716 code = (code << 4) + hex_digit_value (c);
1717 p++;
1719 else
1721 cpp_error (pfile,
1722 "non-hex digit '%c' in universal-character-name", c);
1723 break;
1728 #ifdef TARGET_EBCDIC
1729 cpp_error (pfile, "universal-character-name on EBCDIC target");
1730 code = 0x3f; /* EBCDIC invalid character */
1731 #else
1732 if (code > 0x9f && !(code & 0x80000000))
1733 ; /* True extended character, OK. */
1734 else if (code >= 0x20 && code < 0x7f)
1736 /* ASCII printable character. The C character set consists of all of
1737 these except $, @ and `. We use hex escapes so that this also
1738 works with EBCDIC hosts. */
1739 if (code != 0x24 && code != 0x40 && code != 0x60)
1740 cpp_error (pfile, "universal-character-name used for '%c'", code);
1742 else
1743 cpp_error (pfile, "invalid universal-character-name");
1744 #endif
1746 *pstr = p;
1747 return code;
1750 /* Interpret an escape sequence, and return its value. PSTR points to
1751 the input pointer, which is just after the backslash. LIMIT is how
1752 much text we have. MASK is the precision for the target type (char
1753 or wchar_t). TRADITIONAL, if true, does not interpret escapes that
1754 did not exist in traditional C. */
1756 static unsigned int
1757 parse_escape (pfile, pstr, limit, mask, traditional)
1758 cpp_reader *pfile;
1759 const unsigned char **pstr;
1760 const unsigned char *limit;
1761 HOST_WIDE_INT mask;
1762 int traditional;
1764 int unknown = 0;
1765 const unsigned char *str = *pstr;
1766 unsigned int c = *str++;
1768 switch (c)
1770 case '\\': case '\'': case '"': case '?': break;
1771 case 'b': c = TARGET_BS; break;
1772 case 'f': c = TARGET_FF; break;
1773 case 'n': c = TARGET_NEWLINE; break;
1774 case 'r': c = TARGET_CR; break;
1775 case 't': c = TARGET_TAB; break;
1776 case 'v': c = TARGET_VT; break;
1778 case '(': case '{': case '[': case '%':
1779 /* '\(', etc, are used at beginning of line to avoid confusing Emacs.
1780 '\%' is used to prevent SCCS from getting confused. */
1781 unknown = CPP_PEDANTIC (pfile);
1782 break;
1784 case 'a':
1785 if (CPP_WTRADITIONAL (pfile))
1786 cpp_warning (pfile, "the meaning of '\\a' varies with -traditional");
1787 if (!traditional)
1788 c = TARGET_BELL;
1789 break;
1791 case 'e': case 'E':
1792 if (CPP_PEDANTIC (pfile))
1793 cpp_pedwarn (pfile, "non-ISO-standard escape sequence, '\\%c'", c);
1794 c = TARGET_ESC;
1795 break;
1797 /* Warnings and support checks handled by read_ucs(). */
1798 case 'u': case 'U':
1799 if (CPP_OPTION (pfile, cplusplus) || CPP_OPTION (pfile, c99))
1801 if (CPP_WTRADITIONAL (pfile))
1802 cpp_warning (pfile,
1803 "the meaning of '\\%c' varies with -traditional", c);
1804 c = read_ucs (pfile, &str, limit, c == 'u' ? 4 : 8);
1806 else
1807 unknown = 1;
1808 break;
1810 case 'x':
1811 if (CPP_WTRADITIONAL (pfile))
1812 cpp_warning (pfile, "the meaning of '\\x' varies with -traditional");
1814 if (!traditional)
1816 unsigned int i = 0, overflow = 0;
1817 int digits_found = 0;
1819 while (str < limit)
1821 c = *str;
1822 if (! ISXDIGIT (c))
1823 break;
1824 str++;
1825 overflow |= i ^ (i << 4 >> 4);
1826 i = (i << 4) + hex_digit_value (c);
1827 digits_found = 1;
1830 if (!digits_found)
1831 cpp_error (pfile, "\\x used with no following hex digits");
1833 if (overflow | (i != (i & mask)))
1835 cpp_pedwarn (pfile, "hex escape sequence out of range");
1836 i &= mask;
1838 c = i;
1840 break;
1842 case '0': case '1': case '2': case '3':
1843 case '4': case '5': case '6': case '7':
1845 unsigned int i = c - '0';
1846 int count = 0;
1848 while (str < limit && ++count < 3)
1850 c = *str;
1851 if (c < '0' || c > '7')
1852 break;
1853 str++;
1854 i = (i << 3) + c - '0';
1857 if (i != (i & mask))
1859 cpp_pedwarn (pfile, "octal escape sequence out of range");
1860 i &= mask;
1862 c = i;
1864 break;
1866 default:
1867 unknown = 1;
1868 break;
1871 if (unknown)
1873 if (ISGRAPH (c))
1874 cpp_pedwarn (pfile, "unknown escape sequence '\\%c'", c);
1875 else
1876 cpp_pedwarn (pfile, "unknown escape sequence: '\\%03o'", c);
1879 *pstr = str;
1880 return c;
1883 #ifndef MAX_CHAR_TYPE_SIZE
1884 #define MAX_CHAR_TYPE_SIZE CHAR_TYPE_SIZE
1885 #endif
1887 #ifndef MAX_WCHAR_TYPE_SIZE
1888 #define MAX_WCHAR_TYPE_SIZE WCHAR_TYPE_SIZE
1889 #endif
1891 /* Interpret a (possibly wide) character constant in TOKEN.
1892 WARN_MULTI warns about multi-character charconsts, if not
1893 TRADITIONAL. TRADITIONAL also indicates not to interpret escapes
1894 that did not exist in traditional C. PCHARS_SEEN points to a
1895 variable that is filled in with the number of characters seen. */
1896 HOST_WIDE_INT
1897 cpp_interpret_charconst (pfile, token, warn_multi, traditional, pchars_seen)
1898 cpp_reader *pfile;
1899 const cpp_token *token;
1900 int warn_multi;
1901 int traditional;
1902 unsigned int *pchars_seen;
1904 const unsigned char *str = token->val.str.text;
1905 const unsigned char *limit = str + token->val.str.len;
1906 unsigned int chars_seen = 0;
1907 unsigned int width, max_chars, c;
1908 HOST_WIDE_INT result = 0, mask;
1910 #ifdef MULTIBYTE_CHARS
1911 (void) local_mbtowc (NULL, NULL, 0);
1912 #endif
1914 /* Width in bits. */
1915 if (token->type == CPP_CHAR)
1916 width = MAX_CHAR_TYPE_SIZE;
1917 else
1918 width = MAX_WCHAR_TYPE_SIZE;
1920 if (width < HOST_BITS_PER_WIDE_INT)
1921 mask = ((unsigned HOST_WIDE_INT) 1 << width) - 1;
1922 else
1923 mask = ~0;
1924 max_chars = HOST_BITS_PER_WIDE_INT / width;
1926 while (str < limit)
1928 #ifdef MULTIBYTE_CHARS
1929 wchar_t wc;
1930 int char_len;
1932 char_len = local_mbtowc (&wc, str, limit - str);
1933 if (char_len == -1)
1935 cpp_warning (pfile, "ignoring invalid multibyte character");
1936 c = *str++;
1938 else
1940 str += char_len;
1941 c = wc;
1943 #else
1944 c = *str++;
1945 #endif
1947 if (c == '\\')
1949 c = parse_escape (pfile, &str, limit, mask, traditional);
1950 if (width < HOST_BITS_PER_WIDE_INT && c > mask)
1951 cpp_pedwarn (pfile, "escape sequence out of range for character");
1954 #ifdef MAP_CHARACTER
1955 if (ISPRINT (c))
1956 c = MAP_CHARACTER (c);
1957 #endif
1959 /* Merge character into result; ignore excess chars. */
1960 if (++chars_seen <= max_chars)
1962 if (width < HOST_BITS_PER_WIDE_INT)
1963 result = (result << width) | (c & mask);
1964 else
1965 result = c;
1969 if (chars_seen == 0)
1970 cpp_error (pfile, "empty character constant");
1971 else if (chars_seen > max_chars)
1973 chars_seen = max_chars;
1974 cpp_error (pfile, "character constant too long");
1976 else if (chars_seen > 1 && !traditional && warn_multi)
1977 cpp_warning (pfile, "multi-character character constant");
1979 /* If char type is signed, sign-extend the constant. The
1980 __CHAR_UNSIGNED__ macro is set by the driver if appropriate. */
1981 if (token->type == CPP_CHAR && chars_seen)
1983 unsigned int nbits = chars_seen * width;
1984 unsigned int mask = (unsigned int) ~0 >> (HOST_BITS_PER_INT - nbits);
1986 if (pfile->spec_nodes.n__CHAR_UNSIGNED__->type == NT_MACRO
1987 || ((result >> (nbits - 1)) & 1) == 0)
1988 result &= mask;
1989 else
1990 result |= ~mask;
1993 *pchars_seen = chars_seen;
1994 return result;
1997 /* Memory pools. */
1999 struct dummy
2001 char c;
2002 union
2004 double d;
2005 int *p;
2006 } u;
2009 #define DEFAULT_ALIGNMENT (offsetof (struct dummy, u))
2011 static int
2012 chunk_suitable (pool, chunk, size)
2013 cpp_pool *pool;
2014 cpp_chunk *chunk;
2015 unsigned int size;
2017 /* Being at least twice SIZE means we can use memcpy in
2018 _cpp_next_chunk rather than memmove. Besides, it's a good idea
2019 anyway. */
2020 return (chunk && pool->locked != chunk
2021 && (unsigned int) (chunk->limit - chunk->base) >= size * 2);
2024 /* Returns the end of the new pool. PTR points to a char in the old
2025 pool, and is updated to point to the same char in the new pool. */
2026 unsigned char *
2027 _cpp_next_chunk (pool, len, ptr)
2028 cpp_pool *pool;
2029 unsigned int len;
2030 unsigned char **ptr;
2032 cpp_chunk *chunk = pool->cur->next;
2034 /* LEN is the minimum size we want in the new pool. */
2035 len += POOL_ROOM (pool);
2036 if (! chunk_suitable (pool, chunk, len))
2038 chunk = new_chunk (POOL_SIZE (pool) * 2 + len);
2040 chunk->next = pool->cur->next;
2041 pool->cur->next = chunk;
2044 /* Update the pointer before changing chunk's front. */
2045 if (ptr)
2046 *ptr += chunk->base - POOL_FRONT (pool);
2048 memcpy (chunk->base, POOL_FRONT (pool), POOL_ROOM (pool));
2049 chunk->front = chunk->base;
2051 pool->cur = chunk;
2052 return POOL_LIMIT (pool);
2055 static cpp_chunk *
2056 new_chunk (size)
2057 unsigned int size;
2059 unsigned char *base;
2060 cpp_chunk *result;
2062 size = POOL_ALIGN (size, DEFAULT_ALIGNMENT);
2063 base = (unsigned char *) xmalloc (size + sizeof (cpp_chunk));
2064 /* Put the chunk descriptor at the end. Then chunk overruns will
2065 cause obvious chaos. */
2066 result = (cpp_chunk *) (base + size);
2067 result->base = base;
2068 result->front = base;
2069 result->limit = base + size;
2070 result->next = 0;
2072 return result;
2075 void
2076 _cpp_init_pool (pool, size, align, temp)
2077 cpp_pool *pool;
2078 unsigned int size, align, temp;
2080 if (align == 0)
2081 align = DEFAULT_ALIGNMENT;
2082 if (align & (align - 1))
2083 abort ();
2084 pool->align = align;
2085 pool->cur = new_chunk (size);
2086 pool->locked = 0;
2087 pool->locks = 0;
2088 if (temp)
2089 pool->cur->next = pool->cur;
2092 void
2093 _cpp_lock_pool (pool)
2094 cpp_pool *pool;
2096 if (pool->locks++ == 0)
2097 pool->locked = pool->cur;
2100 void
2101 _cpp_unlock_pool (pool)
2102 cpp_pool *pool;
2104 if (--pool->locks == 0)
2105 pool->locked = 0;
2108 void
2109 _cpp_free_pool (pool)
2110 cpp_pool *pool;
2112 cpp_chunk *chunk = pool->cur, *next;
2116 next = chunk->next;
2117 free (chunk->base);
2118 chunk = next;
2120 while (chunk && chunk != pool->cur);
2123 /* Reserve LEN bytes from a memory pool. */
2124 unsigned char *
2125 _cpp_pool_reserve (pool, len)
2126 cpp_pool *pool;
2127 unsigned int len;
2129 len = POOL_ALIGN (len, pool->align);
2130 if (len > (unsigned int) POOL_ROOM (pool))
2131 _cpp_next_chunk (pool, len, 0);
2133 return POOL_FRONT (pool);
2136 /* Allocate LEN bytes from a memory pool. */
2137 unsigned char *
2138 _cpp_pool_alloc (pool, len)
2139 cpp_pool *pool;
2140 unsigned int len;
2142 unsigned char *result = _cpp_pool_reserve (pool, len);
2144 POOL_COMMIT (pool, len);
2145 return result;